1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
43 #include "integrate.h"
52 #include "target-def.h"
54 static int hppa_use_dfa_pipeline_interface PARAMS ((void));
56 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
57 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hppa_use_dfa_pipeline_interface
60 hppa_use_dfa_pipeline_interface ()
65 /* Return nonzero if there is a bypass for the output of
66 OUT_INSN and the fp store IN_INSN. */
68 hppa_fpstore_bypass_p (out_insn, in_insn)
69 rtx out_insn, in_insn;
71 enum machine_mode store_mode;
72 enum machine_mode other_mode;
75 if (recog_memoized (in_insn) < 0
76 || get_attr_type (in_insn) != TYPE_FPSTORE
77 || recog_memoized (out_insn) < 0)
80 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
82 set = single_set (out_insn);
86 other_mode = GET_MODE (SET_SRC (set));
88 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
92 #ifndef DO_FRAME_NOTES
93 #ifdef INCOMING_RETURN_ADDR_RTX
94 #define DO_FRAME_NOTES 1
96 #define DO_FRAME_NOTES 0
100 static int hppa_address_cost PARAMS ((rtx));
101 static bool hppa_rtx_costs PARAMS ((rtx, int, int, int *));
102 static inline rtx force_mode PARAMS ((enum machine_mode, rtx));
103 static void pa_reorg PARAMS ((void));
104 static void pa_combine_instructions PARAMS ((void));
105 static int pa_can_combine_p PARAMS ((rtx, rtx, rtx, int, rtx, rtx, rtx));
106 static int forward_branch_p PARAMS ((rtx));
107 static int shadd_constant_p PARAMS ((int));
108 static void compute_zdepwi_operands PARAMS ((unsigned HOST_WIDE_INT, unsigned *));
109 static int compute_movstrsi_length PARAMS ((rtx));
110 static bool pa_assemble_integer PARAMS ((rtx, unsigned int, int));
111 static void remove_useless_addtr_insns PARAMS ((int));
112 static void store_reg PARAMS ((int, int, int));
113 static void store_reg_modify PARAMS ((int, int, int));
114 static void load_reg PARAMS ((int, int, int));
115 static void set_reg_plus_d PARAMS ((int, int, int, int));
116 static void pa_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
117 static void pa_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
118 static int pa_adjust_cost PARAMS ((rtx, rtx, rtx, int));
119 static int pa_adjust_priority PARAMS ((rtx, int));
120 static int pa_issue_rate PARAMS ((void));
121 static void pa_select_section PARAMS ((tree, int, unsigned HOST_WIDE_INT))
123 static void pa_encode_section_info PARAMS ((tree, rtx, int));
124 static const char *pa_strip_name_encoding PARAMS ((const char *));
125 static bool pa_function_ok_for_sibcall PARAMS ((tree, tree));
126 static void pa_globalize_label PARAMS ((FILE *, const char *))
128 static void pa_asm_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
129 HOST_WIDE_INT, tree));
130 #if !defined(USE_COLLECT2)
131 static void pa_asm_out_constructor PARAMS ((rtx, int));
132 static void pa_asm_out_destructor PARAMS ((rtx, int));
134 static void pa_init_builtins PARAMS ((void));
135 static void copy_fp_args PARAMS ((rtx)) ATTRIBUTE_UNUSED;
136 static int length_fp_args PARAMS ((rtx)) ATTRIBUTE_UNUSED;
137 static struct deferred_plabel *get_plabel PARAMS ((const char *))
140 /* Save the operands last given to a compare for use when we
141 generate a scc or bcc insn. */
142 rtx hppa_compare_op0, hppa_compare_op1;
143 enum cmp_type hppa_branch_type;
145 /* Which cpu we are scheduling for. */
146 enum processor_type pa_cpu;
148 /* String to hold which cpu we are scheduling for. */
149 const char *pa_cpu_string;
151 /* Which architecture we are generating code for. */
152 enum architecture_type pa_arch;
154 /* String to hold which architecture we are generating code for. */
155 const char *pa_arch_string;
157 /* Counts for the number of callee-saved general and floating point
158 registers which were saved by the current function's prologue. */
159 static int gr_saved, fr_saved;
161 static rtx find_addr_reg PARAMS ((rtx));
163 /* Keep track of the number of bytes we have output in the CODE subspaces
164 during this compilation so we'll know when to emit inline long-calls. */
165 unsigned long total_code_bytes;
167 /* Variables to handle plabels that we discover are necessary at assembly
168 output time. They are output after the current function. */
169 struct deferred_plabel GTY(())
174 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
176 static size_t n_deferred_plabels = 0;
178 /* Initialize the GCC target structure. */
180 #undef TARGET_ASM_ALIGNED_HI_OP
181 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
182 #undef TARGET_ASM_ALIGNED_SI_OP
183 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
184 #undef TARGET_ASM_ALIGNED_DI_OP
185 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
186 #undef TARGET_ASM_UNALIGNED_HI_OP
187 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
188 #undef TARGET_ASM_UNALIGNED_SI_OP
189 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
190 #undef TARGET_ASM_UNALIGNED_DI_OP
191 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
192 #undef TARGET_ASM_INTEGER
193 #define TARGET_ASM_INTEGER pa_assemble_integer
195 #undef TARGET_ASM_FUNCTION_PROLOGUE
196 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
197 #undef TARGET_ASM_FUNCTION_EPILOGUE
198 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
200 #undef TARGET_SCHED_ADJUST_COST
201 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
202 #undef TARGET_SCHED_ADJUST_PRIORITY
203 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
204 #undef TARGET_SCHED_ISSUE_RATE
205 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
207 #undef TARGET_ENCODE_SECTION_INFO
208 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
209 #undef TARGET_STRIP_NAME_ENCODING
210 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
212 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
213 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
215 #undef TARGET_ASM_OUTPUT_MI_THUNK
216 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
217 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
218 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
220 #if !defined(USE_COLLECT2)
221 #undef TARGET_ASM_CONSTRUCTOR
222 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
223 #undef TARGET_ASM_DESTRUCTOR
224 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
227 #undef TARGET_INIT_BUILTINS
228 #define TARGET_INIT_BUILTINS pa_init_builtins
230 #undef TARGET_RTX_COSTS
231 #define TARGET_RTX_COSTS hppa_rtx_costs
232 #undef TARGET_ADDRESS_COST
233 #define TARGET_ADDRESS_COST hppa_address_cost
235 #undef TARGET_MACHINE_DEPENDENT_REORG
236 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
238 struct gcc_target targetm = TARGET_INITIALIZER;
243 if (pa_cpu_string == NULL)
244 pa_cpu_string = TARGET_SCHED_DEFAULT;
246 if (! strcmp (pa_cpu_string, "8000"))
248 pa_cpu_string = "8000";
249 pa_cpu = PROCESSOR_8000;
251 else if (! strcmp (pa_cpu_string, "7100"))
253 pa_cpu_string = "7100";
254 pa_cpu = PROCESSOR_7100;
256 else if (! strcmp (pa_cpu_string, "700"))
258 pa_cpu_string = "700";
259 pa_cpu = PROCESSOR_700;
261 else if (! strcmp (pa_cpu_string, "7100LC"))
263 pa_cpu_string = "7100LC";
264 pa_cpu = PROCESSOR_7100LC;
266 else if (! strcmp (pa_cpu_string, "7200"))
268 pa_cpu_string = "7200";
269 pa_cpu = PROCESSOR_7200;
271 else if (! strcmp (pa_cpu_string, "7300"))
273 pa_cpu_string = "7300";
274 pa_cpu = PROCESSOR_7300;
278 warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, 7300, and 8000\n", pa_cpu_string);
281 /* Set the instruction set architecture. */
282 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
284 pa_arch_string = "1.0";
285 pa_arch = ARCHITECTURE_10;
286 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
288 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
290 pa_arch_string = "1.1";
291 pa_arch = ARCHITECTURE_11;
292 target_flags &= ~MASK_PA_20;
293 target_flags |= MASK_PA_11;
295 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
297 pa_arch_string = "2.0";
298 pa_arch = ARCHITECTURE_20;
299 target_flags |= MASK_PA_11 | MASK_PA_20;
301 else if (pa_arch_string)
303 warning ("unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
306 /* Unconditional branches in the delay slot are not compatible with dwarf2
307 call frame information. There is no benefit in using this optimization
308 on PA8000 and later processors. */
309 if (pa_cpu >= PROCESSOR_8000
310 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
311 || flag_unwind_tables)
312 target_flags &= ~MASK_JUMP_IN_DELAY;
314 if (flag_pic && TARGET_PORTABLE_RUNTIME)
316 warning ("PIC code generation is not supported in the portable runtime model\n");
319 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
321 warning ("PIC code generation is not compatible with fast indirect calls\n");
324 if (! TARGET_GAS && write_symbols != NO_DEBUG)
326 warning ("-g is only supported when using GAS on this processor,");
327 warning ("-g option disabled");
328 write_symbols = NO_DEBUG;
331 /* We only support the "big PIC" model now. And we always generate PIC
332 code when in 64bit mode. */
333 if (flag_pic == 1 || TARGET_64BIT)
336 /* We can't guarantee that .dword is available for 32-bit targets. */
337 if (UNITS_PER_WORD == 4)
338 targetm.asm_out.aligned_op.di = NULL;
340 /* The unaligned ops are only available when using GAS. */
343 targetm.asm_out.unaligned_op.hi = NULL;
344 targetm.asm_out.unaligned_op.si = NULL;
345 targetm.asm_out.unaligned_op.di = NULL;
352 #ifdef DONT_HAVE_FPUTC_UNLOCKED
353 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
357 /* Return nonzero only if OP is a register of mode MODE,
360 reg_or_0_operand (op, mode)
362 enum machine_mode mode;
364 return (op == CONST0_RTX (mode) || register_operand (op, mode));
367 /* Return nonzero if OP is suitable for use in a call to a named
370 For 2.5 try to eliminate either call_operand_address or
371 function_label_operand, they perform very similar functions. */
373 call_operand_address (op, mode)
375 enum machine_mode mode ATTRIBUTE_UNUSED;
377 return (GET_MODE (op) == word_mode
378 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
381 /* Return 1 if X contains a symbolic expression. We know these
382 expressions will have one of a few well defined forms, so
383 we need only check those forms. */
385 symbolic_expression_p (x)
389 /* Strip off any HIGH. */
390 if (GET_CODE (x) == HIGH)
393 return (symbolic_operand (x, VOIDmode));
397 symbolic_operand (op, mode)
399 enum machine_mode mode ATTRIBUTE_UNUSED;
401 switch (GET_CODE (op))
408 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
409 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
410 && GET_CODE (XEXP (op, 1)) == CONST_INT);
416 /* Return truth value of statement that OP is a symbolic memory
417 operand of mode MODE. */
420 symbolic_memory_operand (op, mode)
422 enum machine_mode mode ATTRIBUTE_UNUSED;
424 if (GET_CODE (op) == SUBREG)
425 op = SUBREG_REG (op);
426 if (GET_CODE (op) != MEM)
429 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
430 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
433 /* Return 1 if the operand is either a register or a memory operand that is
437 reg_or_nonsymb_mem_operand (op, mode)
439 enum machine_mode mode;
441 if (register_operand (op, mode))
444 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
450 /* Return 1 if the operand is either a register, zero, or a memory operand
451 that is not symbolic. */
454 reg_or_0_or_nonsymb_mem_operand (op, mode)
456 enum machine_mode mode;
458 if (register_operand (op, mode))
461 if (op == CONST0_RTX (mode))
464 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
470 /* Return 1 if the operand is a register operand or a non-symbolic memory
471 operand after reload. This predicate is used for branch patterns that
472 internally handle register reloading. We need to accept non-symbolic
473 memory operands after reload to ensure that the pattern is still valid
474 if reload didn't find a hard register for the operand. */
477 reg_before_reload_operand (op, mode)
479 enum machine_mode mode;
481 /* Don't accept a SUBREG since it will need a reload. */
482 if (GET_CODE (op) == SUBREG)
485 if (register_operand (op, mode))
489 && memory_operand (op, mode)
490 && ! symbolic_memory_operand (op, mode))
496 /* Accept any constant that can be moved in one instruction into a
499 cint_ok_for_move (intval)
500 HOST_WIDE_INT intval;
502 /* OK if ldo, ldil, or zdepi, can be used. */
503 return (CONST_OK_FOR_LETTER_P (intval, 'J')
504 || CONST_OK_FOR_LETTER_P (intval, 'N')
505 || CONST_OK_FOR_LETTER_P (intval, 'K'));
508 /* Accept anything that can be moved in one instruction into a general
511 move_operand (op, mode)
513 enum machine_mode mode;
515 if (register_operand (op, mode))
518 if (GET_CODE (op) == CONSTANT_P_RTX)
521 if (GET_CODE (op) == CONST_INT)
522 return cint_ok_for_move (INTVAL (op));
524 if (GET_CODE (op) == SUBREG)
525 op = SUBREG_REG (op);
526 if (GET_CODE (op) != MEM)
531 /* We consider a LO_SUM DLT reference a move_operand now since it has
532 been merged into the normal movsi/movdi patterns. */
533 if (GET_CODE (op) == LO_SUM
534 && GET_CODE (XEXP (op, 0)) == REG
535 && REG_OK_FOR_BASE_P (XEXP (op, 0))
536 && GET_CODE (XEXP (op, 1)) == UNSPEC
537 && GET_MODE (op) == Pmode)
540 /* Since move_operand is only used for source operands, we can always
541 allow scaled indexing! */
542 if (! TARGET_DISABLE_INDEXING
543 && GET_CODE (op) == PLUS
544 && ((GET_CODE (XEXP (op, 0)) == MULT
545 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
546 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
547 && INTVAL (XEXP (XEXP (op, 0), 1))
548 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
549 && GET_CODE (XEXP (op, 1)) == REG)
550 || (GET_CODE (XEXP (op, 1)) == MULT
551 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
552 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
553 && INTVAL (XEXP (XEXP (op, 1), 1))
554 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
555 && GET_CODE (XEXP (op, 0)) == REG)))
558 return memory_address_p (mode, op);
561 /* Accept REG and any CONST_INT that can be moved in one instruction into a
564 reg_or_cint_move_operand (op, mode)
566 enum machine_mode mode;
568 if (register_operand (op, mode))
571 if (GET_CODE (op) == CONST_INT)
572 return cint_ok_for_move (INTVAL (op));
578 pic_label_operand (op, mode)
580 enum machine_mode mode ATTRIBUTE_UNUSED;
585 switch (GET_CODE (op))
591 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
592 && GET_CODE (XEXP (op, 1)) == CONST_INT);
599 fp_reg_operand (op, mode)
601 enum machine_mode mode ATTRIBUTE_UNUSED;
603 return reg_renumber && FP_REG_P (op);
608 /* Return truth value of whether OP can be used as an operand in a
609 three operand arithmetic insn that accepts registers of mode MODE
610 or 14-bit signed integers. */
612 arith_operand (op, mode)
614 enum machine_mode mode;
616 return (register_operand (op, mode)
617 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
620 /* Return truth value of whether OP can be used as an operand in a
621 three operand arithmetic insn that accepts registers of mode MODE
622 or 11-bit signed integers. */
624 arith11_operand (op, mode)
626 enum machine_mode mode;
628 return (register_operand (op, mode)
629 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
632 /* Return truth value of whether OP can be used as an operand in a
635 adddi3_operand (op, mode)
637 enum machine_mode mode;
639 return (register_operand (op, mode)
640 || (GET_CODE (op) == CONST_INT
641 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
644 /* A constant integer suitable for use in a PRE_MODIFY memory
647 pre_cint_operand (op, mode)
649 enum machine_mode mode ATTRIBUTE_UNUSED;
651 return (GET_CODE (op) == CONST_INT
652 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
655 /* A constant integer suitable for use in a POST_MODIFY memory
658 post_cint_operand (op, mode)
660 enum machine_mode mode ATTRIBUTE_UNUSED;
662 return (GET_CODE (op) == CONST_INT
663 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
667 arith_double_operand (op, mode)
669 enum machine_mode mode;
671 return (register_operand (op, mode)
672 || (GET_CODE (op) == CONST_DOUBLE
673 && GET_MODE (op) == mode
674 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
675 && ((CONST_DOUBLE_HIGH (op) >= 0)
676 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
679 /* Return truth value of whether OP is an integer which fits the
680 range constraining immediate operands in three-address insns, or
681 is an integer register. */
684 ireg_or_int5_operand (op, mode)
686 enum machine_mode mode ATTRIBUTE_UNUSED;
688 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
689 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
692 /* Return nonzero if OP is an integer register, else return zero. */
694 ireg_operand (op, mode)
696 enum machine_mode mode ATTRIBUTE_UNUSED;
698 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
701 /* Return truth value of whether OP is an integer which fits the
702 range constraining immediate operands in three-address insns. */
705 int5_operand (op, mode)
707 enum machine_mode mode ATTRIBUTE_UNUSED;
709 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
713 uint5_operand (op, mode)
715 enum machine_mode mode ATTRIBUTE_UNUSED;
717 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
721 int11_operand (op, mode)
723 enum machine_mode mode ATTRIBUTE_UNUSED;
725 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
729 uint32_operand (op, mode)
731 enum machine_mode mode ATTRIBUTE_UNUSED;
733 #if HOST_BITS_PER_WIDE_INT > 32
734 /* All allowed constants will fit a CONST_INT. */
735 return (GET_CODE (op) == CONST_INT
736 && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
738 return (GET_CODE (op) == CONST_INT
739 || (GET_CODE (op) == CONST_DOUBLE
740 && CONST_DOUBLE_HIGH (op) == 0));
745 arith5_operand (op, mode)
747 enum machine_mode mode;
749 return register_operand (op, mode) || int5_operand (op, mode);
752 /* True iff zdepi can be used to generate this CONST_INT.
753 zdepi first sign extends a 5 bit signed number to a given field
754 length, then places this field anywhere in a zero. */
757 unsigned HOST_WIDE_INT x;
759 unsigned HOST_WIDE_INT lsb_mask, t;
761 /* This might not be obvious, but it's at least fast.
762 This function is critical; we don't have the time loops would take. */
764 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
765 /* Return true iff t is a power of two. */
766 return ((t & (t - 1)) == 0);
769 /* True iff depi or extru can be used to compute (reg & mask).
770 Accept bit pattern like these:
776 unsigned HOST_WIDE_INT mask;
779 mask += mask & -mask;
780 return (mask & (mask - 1)) == 0;
783 /* True iff depi or extru can be used to compute (reg & OP). */
785 and_operand (op, mode)
787 enum machine_mode mode;
789 return (register_operand (op, mode)
790 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
793 /* True iff depi can be used to compute (reg | MASK). */
796 unsigned HOST_WIDE_INT mask;
798 mask += mask & -mask;
799 return (mask & (mask - 1)) == 0;
802 /* True iff depi can be used to compute (reg | OP). */
804 ior_operand (op, mode)
806 enum machine_mode mode ATTRIBUTE_UNUSED;
808 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
812 lhs_lshift_operand (op, mode)
814 enum machine_mode mode;
816 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
819 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
820 Such values can be the left hand side x in (x << r), using the zvdepi
823 lhs_lshift_cint_operand (op, mode)
825 enum machine_mode mode ATTRIBUTE_UNUSED;
827 unsigned HOST_WIDE_INT x;
828 if (GET_CODE (op) != CONST_INT)
830 x = INTVAL (op) >> 4;
831 return (x & (x + 1)) == 0;
835 arith32_operand (op, mode)
837 enum machine_mode mode;
839 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
843 pc_or_label_operand (op, mode)
845 enum machine_mode mode ATTRIBUTE_UNUSED;
847 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
850 /* Legitimize PIC addresses. If the address is already
851 position-independent, we return ORIG. Newly generated
852 position-independent addresses go to REG. If we need more
853 than one register, we lose. */
856 legitimize_pic_address (orig, mode, reg)
858 enum machine_mode mode;
862 /* Labels need special handling. */
863 if (pic_label_operand (orig, mode))
865 /* We do not want to go through the movXX expanders here since that
866 would create recursion.
868 Nor do we really want to call a generator for a named pattern
869 since that requires multiple patterns if we want to support
872 So instead we just emit the raw set, which avoids the movXX
873 expanders completely. */
874 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
875 current_function_uses_pic_offset_table = 1;
878 if (GET_CODE (orig) == SYMBOL_REF)
884 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
885 gen_rtx_HIGH (word_mode, orig)));
887 = gen_rtx_MEM (Pmode,
888 gen_rtx_LO_SUM (Pmode, reg,
889 gen_rtx_UNSPEC (Pmode,
893 current_function_uses_pic_offset_table = 1;
894 RTX_UNCHANGING_P (pic_ref) = 1;
895 emit_move_insn (reg, pic_ref);
898 else if (GET_CODE (orig) == CONST)
902 if (GET_CODE (XEXP (orig, 0)) == PLUS
903 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
909 if (GET_CODE (XEXP (orig, 0)) == PLUS)
911 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
912 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
913 base == reg ? 0 : reg);
916 if (GET_CODE (orig) == CONST_INT)
918 if (INT_14_BITS (orig))
919 return plus_constant (base, INTVAL (orig));
920 orig = force_reg (Pmode, orig);
922 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
923 /* Likewise, should we set special REG_NOTEs here? */
928 /* Try machine-dependent ways of modifying an illegitimate address
929 to be legitimate. If we find one, return the new, valid address.
930 This macro is used in only one place: `memory_address' in explow.c.
932 OLDX is the address as it was before break_out_memory_refs was called.
933 In some cases it is useful to look at this to decide what needs to be done.
935 MODE and WIN are passed so that this macro can use
936 GO_IF_LEGITIMATE_ADDRESS.
938 It is always safe for this macro to do nothing. It exists to recognize
939 opportunities to optimize the output.
941 For the PA, transform:
943 memory(X + <large int>)
947 if (<large int> & mask) >= 16
948 Y = (<large int> & ~mask) + mask + 1 Round up.
950 Y = (<large int> & ~mask) Round down.
952 memory (Z + (<large int> - Y));
954 This is for CSE to find several similar references, and only use one Z.
956 X can either be a SYMBOL_REF or REG, but because combine can not
957 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
958 D will not fit in 14 bits.
960 MODE_FLOAT references allow displacements which fit in 5 bits, so use
963 MODE_INT references allow displacements which fit in 14 bits, so use
966 This relies on the fact that most mode MODE_FLOAT references will use FP
967 registers and most mode MODE_INT references will use integer registers.
968 (In the rare case of an FP register used in an integer MODE, we depend
969 on secondary reloads to clean things up.)
972 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
973 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
974 addressing modes to be used).
976 Put X and Z into registers. Then put the entire expression into
980 hppa_legitimize_address (x, oldx, mode)
981 rtx x, oldx ATTRIBUTE_UNUSED;
982 enum machine_mode mode;
987 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
989 /* Strip off CONST. */
990 if (GET_CODE (x) == CONST)
993 /* Special case. Get the SYMBOL_REF into a register and use indexing.
994 That should always be safe. */
995 if (GET_CODE (x) == PLUS
996 && GET_CODE (XEXP (x, 0)) == REG
997 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
999 rtx reg = force_reg (Pmode, XEXP (x, 1));
1000 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1003 /* Note we must reject symbols which represent function addresses
1004 since the assembler/linker can't handle arithmetic on plabels. */
1005 if (GET_CODE (x) == PLUS
1006 && GET_CODE (XEXP (x, 1)) == CONST_INT
1007 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1008 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1009 || GET_CODE (XEXP (x, 0)) == REG))
1011 rtx int_part, ptr_reg;
1013 int offset = INTVAL (XEXP (x, 1));
1016 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1017 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
1019 /* Choose which way to round the offset. Round up if we
1020 are >= halfway to the next boundary. */
1021 if ((offset & mask) >= ((mask + 1) / 2))
1022 newoffset = (offset & ~ mask) + mask + 1;
1024 newoffset = (offset & ~ mask);
1026 /* If the newoffset will not fit in 14 bits (ldo), then
1027 handling this would take 4 or 5 instructions (2 to load
1028 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1029 add the new offset and the SYMBOL_REF.) Combine can
1030 not handle 4->2 or 5->2 combinations, so do not create
1032 if (! VAL_14_BITS_P (newoffset)
1033 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1035 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1038 gen_rtx_HIGH (Pmode, const_part));
1041 gen_rtx_LO_SUM (Pmode,
1042 tmp_reg, const_part));
1046 if (! VAL_14_BITS_P (newoffset))
1047 int_part = force_reg (Pmode, GEN_INT (newoffset));
1049 int_part = GEN_INT (newoffset);
1051 ptr_reg = force_reg (Pmode,
1052 gen_rtx_PLUS (Pmode,
1053 force_reg (Pmode, XEXP (x, 0)),
1056 return plus_constant (ptr_reg, offset - newoffset);
1059 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1061 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1062 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1063 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1064 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
1065 || GET_CODE (XEXP (x, 1)) == SUBREG)
1066 && GET_CODE (XEXP (x, 1)) != CONST)
1068 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1072 if (GET_CODE (reg1) != REG)
1073 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1075 reg2 = XEXP (XEXP (x, 0), 0);
1076 if (GET_CODE (reg2) != REG)
1077 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1079 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1080 gen_rtx_MULT (Pmode,
1086 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1088 Only do so for floating point modes since this is more speculative
1089 and we lose if it's an integer store. */
1090 if (GET_CODE (x) == PLUS
1091 && GET_CODE (XEXP (x, 0)) == PLUS
1092 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1093 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1094 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1095 && (mode == SFmode || mode == DFmode))
1098 /* First, try and figure out what to use as a base register. */
1099 rtx reg1, reg2, base, idx, orig_base;
1101 reg1 = XEXP (XEXP (x, 0), 1);
1106 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1107 then emit_move_sequence will turn on REG_POINTER so we'll know
1108 it's a base register below. */
1109 if (GET_CODE (reg1) != REG)
1110 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1112 if (GET_CODE (reg2) != REG)
1113 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1115 /* Figure out what the base and index are. */
1117 if (GET_CODE (reg1) == REG
1118 && REG_POINTER (reg1))
1121 orig_base = XEXP (XEXP (x, 0), 1);
1122 idx = gen_rtx_PLUS (Pmode,
1123 gen_rtx_MULT (Pmode,
1124 XEXP (XEXP (XEXP (x, 0), 0), 0),
1125 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1128 else if (GET_CODE (reg2) == REG
1129 && REG_POINTER (reg2))
1132 orig_base = XEXP (x, 1);
1139 /* If the index adds a large constant, try to scale the
1140 constant so that it can be loaded with only one insn. */
1141 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1142 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1143 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1144 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1146 /* Divide the CONST_INT by the scale factor, then add it to A. */
1147 int val = INTVAL (XEXP (idx, 1));
1149 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1150 reg1 = XEXP (XEXP (idx, 0), 0);
1151 if (GET_CODE (reg1) != REG)
1152 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1154 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1156 /* We can now generate a simple scaled indexed address. */
1159 (Pmode, gen_rtx_PLUS (Pmode,
1160 gen_rtx_MULT (Pmode, reg1,
1161 XEXP (XEXP (idx, 0), 1)),
1165 /* If B + C is still a valid base register, then add them. */
1166 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1167 && INTVAL (XEXP (idx, 1)) <= 4096
1168 && INTVAL (XEXP (idx, 1)) >= -4096)
1170 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1173 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1175 reg2 = XEXP (XEXP (idx, 0), 0);
1176 if (GET_CODE (reg2) != CONST_INT)
1177 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1179 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1180 gen_rtx_MULT (Pmode,
1186 /* Get the index into a register, then add the base + index and
1187 return a register holding the result. */
1189 /* First get A into a register. */
1190 reg1 = XEXP (XEXP (idx, 0), 0);
1191 if (GET_CODE (reg1) != REG)
1192 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1194 /* And get B into a register. */
1195 reg2 = XEXP (idx, 1);
1196 if (GET_CODE (reg2) != REG)
1197 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1199 reg1 = force_reg (Pmode,
1200 gen_rtx_PLUS (Pmode,
1201 gen_rtx_MULT (Pmode, reg1,
1202 XEXP (XEXP (idx, 0), 1)),
1205 /* Add the result to our base register and return. */
1206 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1210 /* Uh-oh. We might have an address for x[n-100000]. This needs
1211 special handling to avoid creating an indexed memory address
1212 with x-100000 as the base.
1214 If the constant part is small enough, then it's still safe because
1215 there is a guard page at the beginning and end of the data segment.
1217 Scaled references are common enough that we want to try and rearrange the
1218 terms so that we can use indexing for these addresses too. Only
1219 do the optimization for floatint point modes. */
1221 if (GET_CODE (x) == PLUS
1222 && symbolic_expression_p (XEXP (x, 1)))
1224 /* Ugly. We modify things here so that the address offset specified
1225 by the index expression is computed first, then added to x to form
1226 the entire address. */
1228 rtx regx1, regx2, regy1, regy2, y;
1230 /* Strip off any CONST. */
1232 if (GET_CODE (y) == CONST)
1235 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1237 /* See if this looks like
1238 (plus (mult (reg) (shadd_const))
1239 (const (plus (symbol_ref) (const_int))))
1241 Where const_int is small. In that case the const
1242 expression is a valid pointer for indexing.
1244 If const_int is big, but can be divided evenly by shadd_const
1245 and added to (reg). This allows more scaled indexed addresses. */
1246 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1247 && GET_CODE (XEXP (x, 0)) == MULT
1248 && GET_CODE (XEXP (y, 1)) == CONST_INT
1249 && INTVAL (XEXP (y, 1)) >= -4096
1250 && INTVAL (XEXP (y, 1)) <= 4095
1251 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1252 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1254 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1258 if (GET_CODE (reg1) != REG)
1259 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1261 reg2 = XEXP (XEXP (x, 0), 0);
1262 if (GET_CODE (reg2) != REG)
1263 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1265 return force_reg (Pmode,
1266 gen_rtx_PLUS (Pmode,
1267 gen_rtx_MULT (Pmode,
1272 else if ((mode == DFmode || mode == SFmode)
1273 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1274 && GET_CODE (XEXP (x, 0)) == MULT
1275 && GET_CODE (XEXP (y, 1)) == CONST_INT
1276 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1277 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1278 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1281 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1282 / INTVAL (XEXP (XEXP (x, 0), 1))));
1283 regx2 = XEXP (XEXP (x, 0), 0);
1284 if (GET_CODE (regx2) != REG)
1285 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1286 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1290 gen_rtx_PLUS (Pmode,
1291 gen_rtx_MULT (Pmode, regx2,
1292 XEXP (XEXP (x, 0), 1)),
1293 force_reg (Pmode, XEXP (y, 0))));
1295 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1296 && INTVAL (XEXP (y, 1)) >= -4096
1297 && INTVAL (XEXP (y, 1)) <= 4095)
1299 /* This is safe because of the guard page at the
1300 beginning and end of the data space. Just
1301 return the original address. */
1306 /* Doesn't look like one we can optimize. */
1307 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1308 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1309 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1310 regx1 = force_reg (Pmode,
1311 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1313 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1321 /* For the HPPA, REG and REG+CONST is cost 0
1322 and addresses involving symbolic constants are cost 2.
1324 PIC addresses are very expensive.
1326 It is no coincidence that this has the same structure
1327 as GO_IF_LEGITIMATE_ADDRESS. */
1330 hppa_address_cost (X)
1333 switch (GET_CODE (X))
1346 /* Compute a (partial) cost for rtx X. Return true if the complete
1347 cost has been computed, and false if subexpressions should be
1348 scanned. In either case, *TOTAL contains the cost result. */
1351 hppa_rtx_costs (x, code, outer_code, total)
1353 int code, outer_code;
1359 if (INTVAL (x) == 0)
1361 else if (INT_14_BITS (x))
1378 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1379 && outer_code != SET)
1386 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1387 *total = COSTS_N_INSNS (3);
1388 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1389 *total = COSTS_N_INSNS (8);
1391 *total = COSTS_N_INSNS (20);
1395 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1397 *total = COSTS_N_INSNS (14);
1405 *total = COSTS_N_INSNS (60);
1408 case PLUS: /* this includes shNadd insns */
1410 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1411 *total = COSTS_N_INSNS (3);
1413 *total = COSTS_N_INSNS (1);
1419 *total = COSTS_N_INSNS (1);
1427 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1428 new rtx with the correct mode. */
1430 force_mode (mode, orig)
1431 enum machine_mode mode;
1434 if (mode == GET_MODE (orig))
1437 if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
1440 return gen_rtx_REG (mode, REGNO (orig));
1443 /* Emit insns to move operands[1] into operands[0].
1445 Return 1 if we have written out everything that needs to be done to
1446 do the move. Otherwise, return 0 and the caller will emit the move
1449 Note SCRATCH_REG may not be in the proper mode depending on how it
1450 will be used. This routine is resposible for creating a new copy
1451 of SCRATCH_REG in the proper mode. */
1454 emit_move_sequence (operands, mode, scratch_reg)
1456 enum machine_mode mode;
1459 register rtx operand0 = operands[0];
1460 register rtx operand1 = operands[1];
1464 && reload_in_progress && GET_CODE (operand0) == REG
1465 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1466 operand0 = reg_equiv_mem[REGNO (operand0)];
1467 else if (scratch_reg
1468 && reload_in_progress && GET_CODE (operand0) == SUBREG
1469 && GET_CODE (SUBREG_REG (operand0)) == REG
1470 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1472 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1473 the code which tracks sets/uses for delete_output_reload. */
1474 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1475 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1476 SUBREG_BYTE (operand0));
1477 operand0 = alter_subreg (&temp);
1481 && reload_in_progress && GET_CODE (operand1) == REG
1482 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1483 operand1 = reg_equiv_mem[REGNO (operand1)];
1484 else if (scratch_reg
1485 && reload_in_progress && GET_CODE (operand1) == SUBREG
1486 && GET_CODE (SUBREG_REG (operand1)) == REG
1487 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1489 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1490 the code which tracks sets/uses for delete_output_reload. */
1491 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1492 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1493 SUBREG_BYTE (operand1));
1494 operand1 = alter_subreg (&temp);
1497 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1498 && ((tem = find_replacement (&XEXP (operand0, 0)))
1499 != XEXP (operand0, 0)))
1500 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1501 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1502 && ((tem = find_replacement (&XEXP (operand1, 0)))
1503 != XEXP (operand1, 0)))
1504 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1506 /* Handle secondary reloads for loads/stores of FP registers from
1507 REG+D addresses where D does not fit in 5 bits, including
1508 (subreg (mem (addr))) cases. */
1509 if (fp_reg_operand (operand0, mode)
1510 && ((GET_CODE (operand1) == MEM
1511 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1512 || ((GET_CODE (operand1) == SUBREG
1513 && GET_CODE (XEXP (operand1, 0)) == MEM
1514 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1517 if (GET_CODE (operand1) == SUBREG)
1518 operand1 = XEXP (operand1, 0);
1520 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1521 it in WORD_MODE regardless of what mode it was originally given
1523 scratch_reg = force_mode (word_mode, scratch_reg);
1525 /* D might not fit in 14 bits either; for such cases load D into
1527 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1529 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1530 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1532 XEXP (XEXP (operand1, 0), 0),
1536 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1537 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1538 gen_rtx_MEM (mode, scratch_reg)));
1541 else if (fp_reg_operand (operand1, mode)
1542 && ((GET_CODE (operand0) == MEM
1543 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1544 || ((GET_CODE (operand0) == SUBREG)
1545 && GET_CODE (XEXP (operand0, 0)) == MEM
1546 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1549 if (GET_CODE (operand0) == SUBREG)
1550 operand0 = XEXP (operand0, 0);
1552 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1553 it in WORD_MODE regardless of what mode it was originally given
1555 scratch_reg = force_mode (word_mode, scratch_reg);
1557 /* D might not fit in 14 bits either; for such cases load D into
1559 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1561 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1562 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1565 XEXP (XEXP (operand0, 0),
1570 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1571 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1575 /* Handle secondary reloads for loads of FP registers from constant
1576 expressions by forcing the constant into memory.
1578 use scratch_reg to hold the address of the memory location.
1580 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1581 NO_REGS when presented with a const_int and a register class
1582 containing only FP registers. Doing so unfortunately creates
1583 more problems than it solves. Fix this for 2.5. */
1584 else if (fp_reg_operand (operand0, mode)
1585 && CONSTANT_P (operand1)
1590 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1591 it in WORD_MODE regardless of what mode it was originally given
1593 scratch_reg = force_mode (word_mode, scratch_reg);
1595 /* Force the constant into memory and put the address of the
1596 memory location into scratch_reg. */
1597 xoperands[0] = scratch_reg;
1598 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1599 emit_move_sequence (xoperands, Pmode, 0);
1601 /* Now load the destination register. */
1602 emit_insn (gen_rtx_SET (mode, operand0,
1603 gen_rtx_MEM (mode, scratch_reg)));
1606 /* Handle secondary reloads for SAR. These occur when trying to load
1607 the SAR from memory, FP register, or with a constant. */
1608 else if (GET_CODE (operand0) == REG
1609 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1610 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1611 && (GET_CODE (operand1) == MEM
1612 || GET_CODE (operand1) == CONST_INT
1613 || (GET_CODE (operand1) == REG
1614 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1617 /* D might not fit in 14 bits either; for such cases load D into
1619 if (GET_CODE (operand1) == MEM
1620 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1622 /* We are reloading the address into the scratch register, so we
1623 want to make sure the scratch register is a full register. */
1624 scratch_reg = force_mode (word_mode, scratch_reg);
1626 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1627 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1630 XEXP (XEXP (operand1, 0),
1634 /* Now we are going to load the scratch register from memory,
1635 we want to load it in the same width as the original MEM,
1636 which must be the same as the width of the ultimate destination,
1638 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1640 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1645 /* We want to load the scratch register using the same mode as
1646 the ultimate destination. */
1647 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1649 emit_move_insn (scratch_reg, operand1);
1652 /* And emit the insn to set the ultimate destination. We know that
1653 the scratch register has the same mode as the destination at this
1655 emit_move_insn (operand0, scratch_reg);
1658 /* Handle most common case: storing into a register. */
1659 else if (register_operand (operand0, mode))
1661 if (register_operand (operand1, mode)
1662 || (GET_CODE (operand1) == CONST_INT
1663 && cint_ok_for_move (INTVAL (operand1)))
1664 || (operand1 == CONST0_RTX (mode))
1665 || (GET_CODE (operand1) == HIGH
1666 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1667 /* Only `general_operands' can come here, so MEM is ok. */
1668 || GET_CODE (operand1) == MEM)
1670 /* Run this case quickly. */
1671 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1675 else if (GET_CODE (operand0) == MEM)
1677 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1678 && !(reload_in_progress || reload_completed))
1680 rtx temp = gen_reg_rtx (DFmode);
1682 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1683 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1686 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1688 /* Run this case quickly. */
1689 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1692 if (! (reload_in_progress || reload_completed))
1694 operands[0] = validize_mem (operand0);
1695 operands[1] = operand1 = force_reg (mode, operand1);
1699 /* Simplify the source if we need to.
1700 Note we do have to handle function labels here, even though we do
1701 not consider them legitimate constants. Loop optimizations can
1702 call the emit_move_xxx with one as a source. */
1703 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1704 || function_label_operand (operand1, mode)
1705 || (GET_CODE (operand1) == HIGH
1706 && symbolic_operand (XEXP (operand1, 0), mode)))
1710 if (GET_CODE (operand1) == HIGH)
1713 operand1 = XEXP (operand1, 0);
1715 if (symbolic_operand (operand1, mode))
1717 /* Argh. The assembler and linker can't handle arithmetic
1720 So we force the plabel into memory, load operand0 from
1721 the memory location, then add in the constant part. */
1722 if ((GET_CODE (operand1) == CONST
1723 && GET_CODE (XEXP (operand1, 0)) == PLUS
1724 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1725 || function_label_operand (operand1, mode))
1727 rtx temp, const_part;
1729 /* Figure out what (if any) scratch register to use. */
1730 if (reload_in_progress || reload_completed)
1732 scratch_reg = scratch_reg ? scratch_reg : operand0;
1733 /* SCRATCH_REG will hold an address and maybe the actual
1734 data. We want it in WORD_MODE regardless of what mode it
1735 was originally given to us. */
1736 scratch_reg = force_mode (word_mode, scratch_reg);
1739 scratch_reg = gen_reg_rtx (Pmode);
1741 if (GET_CODE (operand1) == CONST)
1743 /* Save away the constant part of the expression. */
1744 const_part = XEXP (XEXP (operand1, 0), 1);
1745 if (GET_CODE (const_part) != CONST_INT)
1748 /* Force the function label into memory. */
1749 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1753 /* No constant part. */
1754 const_part = NULL_RTX;
1756 /* Force the function label into memory. */
1757 temp = force_const_mem (mode, operand1);
1761 /* Get the address of the memory location. PIC-ify it if
1763 temp = XEXP (temp, 0);
1765 temp = legitimize_pic_address (temp, mode, scratch_reg);
1767 /* Put the address of the memory location into our destination
1770 emit_move_sequence (operands, mode, scratch_reg);
1772 /* Now load from the memory location into our destination
1774 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1775 emit_move_sequence (operands, mode, scratch_reg);
1777 /* And add back in the constant part. */
1778 if (const_part != NULL_RTX)
1779 expand_inc (operand0, const_part);
1788 if (reload_in_progress || reload_completed)
1790 temp = scratch_reg ? scratch_reg : operand0;
1791 /* TEMP will hold an address and maybe the actual
1792 data. We want it in WORD_MODE regardless of what mode it
1793 was originally given to us. */
1794 temp = force_mode (word_mode, temp);
1797 temp = gen_reg_rtx (Pmode);
1799 /* (const (plus (symbol) (const_int))) must be forced to
1800 memory during/after reload if the const_int will not fit
1802 if (GET_CODE (operand1) == CONST
1803 && GET_CODE (XEXP (operand1, 0)) == PLUS
1804 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1805 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1806 && (reload_completed || reload_in_progress)
1809 operands[1] = force_const_mem (mode, operand1);
1810 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1812 emit_move_sequence (operands, mode, temp);
1816 operands[1] = legitimize_pic_address (operand1, mode, temp);
1817 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1820 /* On the HPPA, references to data space are supposed to use dp,
1821 register 27, but showing it in the RTL inhibits various cse
1822 and loop optimizations. */
1827 if (reload_in_progress || reload_completed)
1829 temp = scratch_reg ? scratch_reg : operand0;
1830 /* TEMP will hold an address and maybe the actual
1831 data. We want it in WORD_MODE regardless of what mode it
1832 was originally given to us. */
1833 temp = force_mode (word_mode, temp);
1836 temp = gen_reg_rtx (mode);
1838 /* Loading a SYMBOL_REF into a register makes that register
1839 safe to be used as the base in an indexed address.
1841 Don't mark hard registers though. That loses. */
1842 if (GET_CODE (operand0) == REG
1843 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1844 REG_POINTER (operand0) = 1;
1845 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1846 REG_POINTER (temp) = 1;
1848 set = gen_rtx_SET (mode, operand0, temp);
1850 set = gen_rtx_SET (VOIDmode,
1852 gen_rtx_LO_SUM (mode, temp, operand1));
1854 emit_insn (gen_rtx_SET (VOIDmode,
1856 gen_rtx_HIGH (mode, operand1)));
1862 else if (GET_CODE (operand1) != CONST_INT
1863 || ! cint_ok_for_move (INTVAL (operand1)))
1865 rtx extend = NULL_RTX;
1868 if (TARGET_64BIT && GET_CODE (operand1) == CONST_INT
1869 && HOST_BITS_PER_WIDE_INT > 32
1870 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1872 HOST_WIDE_INT val = INTVAL (operand1);
1875 /* Extract the low order 32 bits of the value and sign extend.
1876 If the new value is the same as the original value, we can
1877 can use the original value as-is. If the new value is
1878 different, we use it and insert the most-significant 32-bits
1879 of the original value into the final result. */
1880 nval = ((val & (((HOST_WIDE_INT) 2 << 31) - 1))
1881 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1884 #if HOST_BITS_PER_WIDE_INT > 32
1885 extend = GEN_INT (val >> 32);
1887 operand1 = GEN_INT (nval);
1891 if (reload_in_progress || reload_completed)
1894 temp = gen_reg_rtx (mode);
1896 /* We don't directly split DImode constants on 32-bit targets
1897 because PLUS uses an 11-bit immediate and the insn sequence
1898 generated is not as efficient as the one using HIGH/LO_SUM. */
1899 if (GET_CODE (operand1) == CONST_INT
1900 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
1902 /* Directly break constant into high and low parts. This
1903 provides better optimization opportunities because various
1904 passes recognize constants split with PLUS but not LO_SUM.
1905 We use a 14-bit signed low part except when the addition
1906 of 0x4000 to the high part might change the sign of the
1908 HOST_WIDE_INT value = INTVAL (operand1);
1909 HOST_WIDE_INT low = value & 0x3fff;
1910 HOST_WIDE_INT high = value & ~ 0x3fff;
1914 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1922 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1923 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1927 emit_insn (gen_rtx_SET (VOIDmode, temp,
1928 gen_rtx_HIGH (mode, operand1)));
1929 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1932 emit_move_insn (operands[0], operands[1]);
1934 if (extend != NULL_RTX)
1935 emit_insn (gen_insv (operands[0], GEN_INT (32), const0_rtx,
1941 /* Now have insn-emit do whatever it normally does. */
1945 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1946 it will need a link/runtime reloc). */
1954 switch (TREE_CODE (exp))
1961 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1962 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1967 case NON_LVALUE_EXPR:
1968 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1974 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1975 if (TREE_VALUE (link) != 0)
1976 reloc |= reloc_needed (TREE_VALUE (link));
1989 /* Does operand (which is a symbolic_operand) live in text space?
1990 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
1994 read_only_operand (operand, mode)
1996 enum machine_mode mode ATTRIBUTE_UNUSED;
1998 if (GET_CODE (operand) == CONST)
1999 operand = XEXP (XEXP (operand, 0), 0);
2002 if (GET_CODE (operand) == SYMBOL_REF)
2003 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2007 if (GET_CODE (operand) == SYMBOL_REF)
2008 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2014 /* Return the best assembler insn template
2015 for moving operands[1] into operands[0] as a fullword. */
2017 singlemove_string (operands)
2020 HOST_WIDE_INT intval;
2022 if (GET_CODE (operands[0]) == MEM)
2023 return "stw %r1,%0";
2024 if (GET_CODE (operands[1]) == MEM)
2026 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2031 if (GET_MODE (operands[1]) != SFmode)
2034 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2036 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2037 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2039 operands[1] = GEN_INT (i);
2040 /* Fall through to CONST_INT case. */
2042 if (GET_CODE (operands[1]) == CONST_INT)
2044 intval = INTVAL (operands[1]);
2046 if (VAL_14_BITS_P (intval))
2048 else if ((intval & 0x7ff) == 0)
2049 return "ldil L'%1,%0";
2050 else if (zdepi_cint_p (intval))
2051 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2053 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2055 return "copy %1,%0";
2059 /* Compute position (in OP[1]) and width (in OP[2])
2060 useful for copying IMM to a register using the zdepi
2061 instructions. Store the immediate value to insert in OP[0]. */
2063 compute_zdepwi_operands (imm, op)
2064 unsigned HOST_WIDE_INT imm;
2069 /* Find the least significant set bit in IMM. */
2070 for (lsb = 0; lsb < 32; lsb++)
2077 /* Choose variants based on *sign* of the 5-bit field. */
2078 if ((imm & 0x10) == 0)
2079 len = (lsb <= 28) ? 4 : 32 - lsb;
2082 /* Find the width of the bitstring in IMM. */
2083 for (len = 5; len < 32; len++)
2085 if ((imm & (1 << len)) == 0)
2089 /* Sign extend IMM as a 5-bit value. */
2090 imm = (imm & 0xf) - 0x10;
2098 /* Compute position (in OP[1]) and width (in OP[2])
2099 useful for copying IMM to a register using the depdi,z
2100 instructions. Store the immediate value to insert in OP[0]. */
2102 compute_zdepdi_operands (imm, op)
2103 unsigned HOST_WIDE_INT imm;
2106 HOST_WIDE_INT lsb, len;
2108 /* Find the least significant set bit in IMM. */
2109 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2116 /* Choose variants based on *sign* of the 5-bit field. */
2117 if ((imm & 0x10) == 0)
2118 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2119 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2122 /* Find the width of the bitstring in IMM. */
2123 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2125 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2129 /* Sign extend IMM as a 5-bit value. */
2130 imm = (imm & 0xf) - 0x10;
2138 /* Output assembler code to perform a doubleword move insn
2139 with operands OPERANDS. */
2142 output_move_double (operands)
2145 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2147 rtx addreg0 = 0, addreg1 = 0;
2149 /* First classify both operands. */
2151 if (REG_P (operands[0]))
2153 else if (offsettable_memref_p (operands[0]))
2155 else if (GET_CODE (operands[0]) == MEM)
2160 if (REG_P (operands[1]))
2162 else if (CONSTANT_P (operands[1]))
2164 else if (offsettable_memref_p (operands[1]))
2166 else if (GET_CODE (operands[1]) == MEM)
2171 /* Check for the cases that the operand constraints are not
2172 supposed to allow to happen. Abort if we get one,
2173 because generating code for these cases is painful. */
2175 if (optype0 != REGOP && optype1 != REGOP)
2178 /* Handle auto decrementing and incrementing loads and stores
2179 specifically, since the structure of the function doesn't work
2180 for them without major modification. Do it better when we learn
2181 this port about the general inc/dec addressing of PA.
2182 (This was written by tege. Chide him if it doesn't work.) */
2184 if (optype0 == MEMOP)
2186 /* We have to output the address syntax ourselves, since print_operand
2187 doesn't deal with the addresses we want to use. Fix this later. */
2189 rtx addr = XEXP (operands[0], 0);
2190 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2192 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2194 operands[0] = XEXP (addr, 0);
2195 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2198 if (!reg_overlap_mentioned_p (high_reg, addr))
2200 /* No overlap between high target register and address
2201 register. (We do this in a non-obvious way to
2202 save a register file writeback) */
2203 if (GET_CODE (addr) == POST_INC)
2204 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2205 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2210 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2212 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2214 operands[0] = XEXP (addr, 0);
2215 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2218 if (!reg_overlap_mentioned_p (high_reg, addr))
2220 /* No overlap between high target register and address
2221 register. (We do this in a non-obvious way to
2222 save a register file writeback) */
2223 if (GET_CODE (addr) == PRE_INC)
2224 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2225 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2231 if (optype1 == MEMOP)
2233 /* We have to output the address syntax ourselves, since print_operand
2234 doesn't deal with the addresses we want to use. Fix this later. */
2236 rtx addr = XEXP (operands[1], 0);
2237 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2239 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2241 operands[1] = XEXP (addr, 0);
2242 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2245 if (!reg_overlap_mentioned_p (high_reg, addr))
2247 /* No overlap between high target register and address
2248 register. (We do this in a non-obvious way to
2249 save a register file writeback) */
2250 if (GET_CODE (addr) == POST_INC)
2251 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2252 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2256 /* This is an undefined situation. We should load into the
2257 address register *and* update that register. Probably
2258 we don't need to handle this at all. */
2259 if (GET_CODE (addr) == POST_INC)
2260 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2261 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2264 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2266 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2268 operands[1] = XEXP (addr, 0);
2269 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2272 if (!reg_overlap_mentioned_p (high_reg, addr))
2274 /* No overlap between high target register and address
2275 register. (We do this in a non-obvious way to
2276 save a register file writeback) */
2277 if (GET_CODE (addr) == PRE_INC)
2278 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2279 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2283 /* This is an undefined situation. We should load into the
2284 address register *and* update that register. Probably
2285 we don't need to handle this at all. */
2286 if (GET_CODE (addr) == PRE_INC)
2287 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2288 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2291 else if (GET_CODE (addr) == PLUS
2292 && GET_CODE (XEXP (addr, 0)) == MULT)
2294 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2296 if (!reg_overlap_mentioned_p (high_reg, addr))
2300 xoperands[0] = high_reg;
2301 xoperands[1] = XEXP (addr, 1);
2302 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2303 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2304 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2306 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2312 xoperands[0] = high_reg;
2313 xoperands[1] = XEXP (addr, 1);
2314 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2315 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2316 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2318 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2323 /* If an operand is an unoffsettable memory ref, find a register
2324 we can increment temporarily to make it refer to the second word. */
2326 if (optype0 == MEMOP)
2327 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2329 if (optype1 == MEMOP)
2330 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2332 /* Ok, we can do one word at a time.
2333 Normally we do the low-numbered word first.
2335 In either case, set up in LATEHALF the operands to use
2336 for the high-numbered word and in some cases alter the
2337 operands in OPERANDS to be suitable for the low-numbered word. */
2339 if (optype0 == REGOP)
2340 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2341 else if (optype0 == OFFSOP)
2342 latehalf[0] = adjust_address (operands[0], SImode, 4);
2344 latehalf[0] = operands[0];
2346 if (optype1 == REGOP)
2347 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2348 else if (optype1 == OFFSOP)
2349 latehalf[1] = adjust_address (operands[1], SImode, 4);
2350 else if (optype1 == CNSTOP)
2351 split_double (operands[1], &operands[1], &latehalf[1]);
2353 latehalf[1] = operands[1];
2355 /* If the first move would clobber the source of the second one,
2356 do them in the other order.
2358 This can happen in two cases:
2360 mem -> register where the first half of the destination register
2361 is the same register used in the memory's address. Reload
2362 can create such insns.
2364 mem in this case will be either register indirect or register
2365 indirect plus a valid offset.
2367 register -> register move where REGNO(dst) == REGNO(src + 1)
2368 someone (Tim/Tege?) claimed this can happen for parameter loads.
2370 Handle mem -> register case first. */
2371 if (optype0 == REGOP
2372 && (optype1 == MEMOP || optype1 == OFFSOP)
2373 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2376 /* Do the late half first. */
2378 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2379 output_asm_insn (singlemove_string (latehalf), latehalf);
2383 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2384 return singlemove_string (operands);
2387 /* Now handle register -> register case. */
2388 if (optype0 == REGOP && optype1 == REGOP
2389 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2391 output_asm_insn (singlemove_string (latehalf), latehalf);
2392 return singlemove_string (operands);
2395 /* Normal case: do the two words, low-numbered first. */
2397 output_asm_insn (singlemove_string (operands), operands);
2399 /* Make any unoffsettable addresses point at high-numbered word. */
2401 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2403 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2406 output_asm_insn (singlemove_string (latehalf), latehalf);
2408 /* Undo the adds we just did. */
2410 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2412 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2418 output_fp_move_double (operands)
2421 if (FP_REG_P (operands[0]))
2423 if (FP_REG_P (operands[1])
2424 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2425 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2427 output_asm_insn ("fldd%F1 %1,%0", operands);
2429 else if (FP_REG_P (operands[1]))
2431 output_asm_insn ("fstd%F0 %1,%0", operands);
2433 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2435 if (GET_CODE (operands[0]) == REG)
2438 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2439 xoperands[0] = operands[0];
2440 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2442 /* This is a pain. You have to be prepared to deal with an
2443 arbitrary address here including pre/post increment/decrement.
2445 so avoid this in the MD. */
2453 /* Return a REG that occurs in ADDR with coefficient 1.
2454 ADDR can be effectively incremented by incrementing REG. */
2457 find_addr_reg (addr)
2460 while (GET_CODE (addr) == PLUS)
2462 if (GET_CODE (XEXP (addr, 0)) == REG)
2463 addr = XEXP (addr, 0);
2464 else if (GET_CODE (XEXP (addr, 1)) == REG)
2465 addr = XEXP (addr, 1);
2466 else if (CONSTANT_P (XEXP (addr, 0)))
2467 addr = XEXP (addr, 1);
2468 else if (CONSTANT_P (XEXP (addr, 1)))
2469 addr = XEXP (addr, 0);
2473 if (GET_CODE (addr) == REG)
2478 /* Emit code to perform a block move.
2480 OPERANDS[0] is the destination pointer as a REG, clobbered.
2481 OPERANDS[1] is the source pointer as a REG, clobbered.
2482 OPERANDS[2] is a register for temporary storage.
2483 OPERANDS[4] is the size as a CONST_INT
2484 OPERANDS[3] is a register for temporary storage.
2485 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2486 OPERANDS[6] is another temporary register. */
2489 output_block_move (operands, size_is_constant)
2491 int size_is_constant ATTRIBUTE_UNUSED;
2493 int align = INTVAL (operands[5]);
2494 unsigned long n_bytes = INTVAL (operands[4]);
2496 /* We can't move more than four bytes at a time because the PA
2497 has no longer integer move insns. (Could use fp mem ops?) */
2501 /* Note that we know each loop below will execute at least twice
2502 (else we would have open-coded the copy). */
2506 /* Pre-adjust the loop counter. */
2507 operands[4] = GEN_INT (n_bytes - 8);
2508 output_asm_insn ("ldi %4,%2", operands);
2511 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2512 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2513 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2514 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2515 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2517 /* Handle the residual. There could be up to 7 bytes of
2518 residual to copy! */
2519 if (n_bytes % 8 != 0)
2521 operands[4] = GEN_INT (n_bytes % 4);
2522 if (n_bytes % 8 >= 4)
2523 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2524 if (n_bytes % 4 != 0)
2525 output_asm_insn ("ldw 0(%1),%6", operands);
2526 if (n_bytes % 8 >= 4)
2527 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2528 if (n_bytes % 4 != 0)
2529 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2534 /* Pre-adjust the loop counter. */
2535 operands[4] = GEN_INT (n_bytes - 4);
2536 output_asm_insn ("ldi %4,%2", operands);
2539 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2540 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2541 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2542 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2543 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2545 /* Handle the residual. */
2546 if (n_bytes % 4 != 0)
2548 if (n_bytes % 4 >= 2)
2549 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2550 if (n_bytes % 2 != 0)
2551 output_asm_insn ("ldb 0(%1),%6", operands);
2552 if (n_bytes % 4 >= 2)
2553 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2554 if (n_bytes % 2 != 0)
2555 output_asm_insn ("stb %6,0(%0)", operands);
2560 /* Pre-adjust the loop counter. */
2561 operands[4] = GEN_INT (n_bytes - 2);
2562 output_asm_insn ("ldi %4,%2", operands);
2565 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2566 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2567 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2568 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2569 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2571 /* Handle the residual. */
2572 if (n_bytes % 2 != 0)
2574 output_asm_insn ("ldb 0(%1),%3", operands);
2575 output_asm_insn ("stb %3,0(%0)", operands);
2584 /* Count the number of insns necessary to handle this block move.
2586 Basic structure is the same as emit_block_move, except that we
2587 count insns rather than emit them. */
2590 compute_movstrsi_length (insn)
2593 rtx pat = PATTERN (insn);
2594 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2595 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2596 unsigned int n_insns = 0;
2598 /* We can't move more than four bytes at a time because the PA
2599 has no longer integer move insns. (Could use fp mem ops?) */
2603 /* The basic copying loop. */
2607 if (n_bytes % (2 * align) != 0)
2609 if ((n_bytes % (2 * align)) >= align)
2612 if ((n_bytes % align) != 0)
2616 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2622 output_and (operands)
2625 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2627 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2628 int ls0, ls1, ms0, p, len;
2630 for (ls0 = 0; ls0 < 32; ls0++)
2631 if ((mask & (1 << ls0)) == 0)
2634 for (ls1 = ls0; ls1 < 32; ls1++)
2635 if ((mask & (1 << ls1)) != 0)
2638 for (ms0 = ls1; ms0 < 32; ms0++)
2639 if ((mask & (1 << ms0)) == 0)
2652 operands[2] = GEN_INT (len);
2653 return "{extru|extrw,u} %1,31,%2,%0";
2657 /* We could use this `depi' for the case above as well, but `depi'
2658 requires one more register file access than an `extru'. */
2663 operands[2] = GEN_INT (p);
2664 operands[3] = GEN_INT (len);
2665 return "{depi|depwi} 0,%2,%3,%0";
2669 return "and %1,%2,%0";
2672 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2673 storing the result in operands[0]. */
2675 output_64bit_and (operands)
2678 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2680 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2681 int ls0, ls1, ms0, p, len;
2683 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2684 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2687 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2688 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2691 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2692 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2695 if (ms0 != HOST_BITS_PER_WIDE_INT)
2698 if (ls1 == HOST_BITS_PER_WIDE_INT)
2705 operands[2] = GEN_INT (len);
2706 return "extrd,u %1,63,%2,%0";
2710 /* We could use this `depi' for the case above as well, but `depi'
2711 requires one more register file access than an `extru'. */
2716 operands[2] = GEN_INT (p);
2717 operands[3] = GEN_INT (len);
2718 return "depdi 0,%2,%3,%0";
2722 return "and %1,%2,%0";
2726 output_ior (operands)
2729 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2730 int bs0, bs1, p, len;
2732 if (INTVAL (operands[2]) == 0)
2733 return "copy %1,%0";
2735 for (bs0 = 0; bs0 < 32; bs0++)
2736 if ((mask & (1 << bs0)) != 0)
2739 for (bs1 = bs0; bs1 < 32; bs1++)
2740 if ((mask & (1 << bs1)) == 0)
2743 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2749 operands[2] = GEN_INT (p);
2750 operands[3] = GEN_INT (len);
2751 return "{depi|depwi} -1,%2,%3,%0";
2754 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2755 storing the result in operands[0]. */
2757 output_64bit_ior (operands)
2760 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2761 int bs0, bs1, p, len;
2763 if (INTVAL (operands[2]) == 0)
2764 return "copy %1,%0";
2766 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2767 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
2770 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2771 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
2774 if (bs1 != HOST_BITS_PER_WIDE_INT
2775 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2781 operands[2] = GEN_INT (p);
2782 operands[3] = GEN_INT (len);
2783 return "depdi -1,%2,%3,%0";
2786 /* Target hook for assembling integer objects. This code handles
2787 aligned SI and DI integers specially, since function references must
2788 be preceded by P%. */
2791 pa_assemble_integer (x, size, aligned_p)
2796 if (size == UNITS_PER_WORD && aligned_p
2797 && function_label_operand (x, VOIDmode))
2799 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
2800 output_addr_const (asm_out_file, x);
2801 fputc ('\n', asm_out_file);
2804 return default_assemble_integer (x, size, aligned_p);
2807 /* Output an ascii string. */
2809 output_ascii (file, p, size)
2816 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2818 /* The HP assembler can only take strings of 256 characters at one
2819 time. This is a limitation on input line length, *not* the
2820 length of the string. Sigh. Even worse, it seems that the
2821 restriction is in number of input characters (see \xnn &
2822 \whatever). So we have to do this very carefully. */
2824 fputs ("\t.STRING \"", file);
2827 for (i = 0; i < size; i += 4)
2831 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2833 register unsigned int c = (unsigned char) p[i + io];
2835 if (c == '\"' || c == '\\')
2836 partial_output[co++] = '\\';
2837 if (c >= ' ' && c < 0177)
2838 partial_output[co++] = c;
2842 partial_output[co++] = '\\';
2843 partial_output[co++] = 'x';
2844 hexd = c / 16 - 0 + '0';
2846 hexd -= '9' - 'a' + 1;
2847 partial_output[co++] = hexd;
2848 hexd = c % 16 - 0 + '0';
2850 hexd -= '9' - 'a' + 1;
2851 partial_output[co++] = hexd;
2854 if (chars_output + co > 243)
2856 fputs ("\"\n\t.STRING \"", file);
2859 fwrite (partial_output, 1, (size_t) co, file);
2863 fputs ("\"\n", file);
2866 /* Try to rewrite floating point comparisons & branches to avoid
2867 useless add,tr insns.
2869 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2870 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2871 first attempt to remove useless add,tr insns. It is zero
2872 for the second pass as reorg sometimes leaves bogus REG_DEAD
2875 When CHECK_NOTES is zero we can only eliminate add,tr insns
2876 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2879 remove_useless_addtr_insns (check_notes)
2883 static int pass = 0;
2885 /* This is fairly cheap, so always run it when optimizing. */
2889 int fbranch_count = 0;
2891 /* Walk all the insns in this function looking for fcmp & fbranch
2892 instructions. Keep track of how many of each we find. */
2893 for (insn = get_insns (); insn; insn = next_insn (insn))
2897 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2898 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2901 tmp = PATTERN (insn);
2903 /* It must be a set. */
2904 if (GET_CODE (tmp) != SET)
2907 /* If the destination is CCFP, then we've found an fcmp insn. */
2908 tmp = SET_DEST (tmp);
2909 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2915 tmp = PATTERN (insn);
2916 /* If this is an fbranch instruction, bump the fbranch counter. */
2917 if (GET_CODE (tmp) == SET
2918 && SET_DEST (tmp) == pc_rtx
2919 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2920 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2921 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2922 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2930 /* Find all floating point compare + branch insns. If possible,
2931 reverse the comparison & the branch to avoid add,tr insns. */
2932 for (insn = get_insns (); insn; insn = next_insn (insn))
2936 /* Ignore anything that isn't an INSN. */
2937 if (GET_CODE (insn) != INSN)
2940 tmp = PATTERN (insn);
2942 /* It must be a set. */
2943 if (GET_CODE (tmp) != SET)
2946 /* The destination must be CCFP, which is register zero. */
2947 tmp = SET_DEST (tmp);
2948 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2951 /* INSN should be a set of CCFP.
2953 See if the result of this insn is used in a reversed FP
2954 conditional branch. If so, reverse our condition and
2955 the branch. Doing so avoids useless add,tr insns. */
2956 next = next_insn (insn);
2959 /* Jumps, calls and labels stop our search. */
2960 if (GET_CODE (next) == JUMP_INSN
2961 || GET_CODE (next) == CALL_INSN
2962 || GET_CODE (next) == CODE_LABEL)
2965 /* As does another fcmp insn. */
2966 if (GET_CODE (next) == INSN
2967 && GET_CODE (PATTERN (next)) == SET
2968 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2969 && REGNO (SET_DEST (PATTERN (next))) == 0)
2972 next = next_insn (next);
2975 /* Is NEXT_INSN a branch? */
2977 && GET_CODE (next) == JUMP_INSN)
2979 rtx pattern = PATTERN (next);
2981 /* If it a reversed fp conditional branch (eg uses add,tr)
2982 and CCFP dies, then reverse our conditional and the branch
2983 to avoid the add,tr. */
2984 if (GET_CODE (pattern) == SET
2985 && SET_DEST (pattern) == pc_rtx
2986 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2987 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2988 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2989 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2990 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2991 && (fcmp_count == fbranch_count
2993 && find_regno_note (next, REG_DEAD, 0))))
2995 /* Reverse the branch. */
2996 tmp = XEXP (SET_SRC (pattern), 1);
2997 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2998 XEXP (SET_SRC (pattern), 2) = tmp;
2999 INSN_CODE (next) = -1;
3001 /* Reverse our condition. */
3002 tmp = PATTERN (insn);
3003 PUT_CODE (XEXP (tmp, 1),
3004 (reverse_condition_maybe_unordered
3005 (GET_CODE (XEXP (tmp, 1)))));
3015 /* You may have trouble believing this, but this is the 32 bit HP-PA
3020 Variable arguments (optional; any number may be allocated)
3022 SP-(4*(N+9)) arg word N
3027 Fixed arguments (must be allocated; may remain unused)
3036 SP-32 External Data Pointer (DP)
3038 SP-24 External/stub RP (RP')
3042 SP-8 Calling Stub RP (RP'')
3047 SP-0 Stack Pointer (points to next available address)
3051 /* This function saves registers as follows. Registers marked with ' are
3052 this function's registers (as opposed to the previous function's).
3053 If a frame_pointer isn't needed, r4 is saved as a general register;
3054 the space for the frame pointer is still allocated, though, to keep
3060 SP (FP') Previous FP
3061 SP + 4 Alignment filler (sigh)
3062 SP + 8 Space for locals reserved here.
3066 SP + n All call saved register used.
3070 SP + o All call saved fp registers used.
3074 SP + p (SP') points to next available address.
3078 /* Global variables set by output_function_prologue(). */
3079 /* Size of frame. Need to know this to emit return insns from
3081 static int actual_fsize;
3082 static int local_fsize, save_fregs;
3084 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3085 Handle case where DISP > 8k by using the add_high_const patterns.
3087 Note in DISP > 8k case, we will leave the high part of the address
3088 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3091 store_reg (reg, disp, base)
3092 int reg, disp, base;
3094 rtx insn, dest, src, basereg;
3096 src = gen_rtx_REG (word_mode, reg);
3097 basereg = gen_rtx_REG (Pmode, base);
3098 if (VAL_14_BITS_P (disp))
3100 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3101 insn = emit_move_insn (dest, src);
3105 rtx delta = GEN_INT (disp);
3106 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3107 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3108 emit_move_insn (tmpreg, high);
3109 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3110 insn = emit_move_insn (dest, src);
3114 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3115 gen_rtx_SET (VOIDmode,
3116 gen_rtx_MEM (word_mode,
3117 gen_rtx_PLUS (word_mode, basereg,
3125 RTX_FRAME_RELATED_P (insn) = 1;
3128 /* Emit RTL to store REG at the memory location specified by BASE and then
3129 add MOD to BASE. MOD must be <= 8k. */
3132 store_reg_modify (base, reg, mod)
3135 rtx insn, basereg, srcreg, delta;
3137 if (! VAL_14_BITS_P (mod))
3140 basereg = gen_rtx_REG (Pmode, base);
3141 srcreg = gen_rtx_REG (word_mode, reg);
3142 delta = GEN_INT (mod);
3144 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3147 RTX_FRAME_RELATED_P (insn) = 1;
3149 /* RTX_FRAME_RELATED_P must be set on each frame related set
3150 in a parallel with more than one element. Don't set
3151 RTX_FRAME_RELATED_P in the first set if reg is temporary
3152 register 1. The effect of this operation is recorded in
3153 the initial copy. */
3156 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3157 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3161 /* The first element of a PARALLEL is always processed if it is
3162 a SET. Thus, we need an expression list for this case. */
3164 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3165 gen_rtx_SET (VOIDmode, basereg,
3166 gen_rtx_PLUS (word_mode, basereg, delta)),
3172 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3173 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3174 whether to add a frame note or not.
3176 In the DISP > 8k case, we leave the high part of the address in %r1.
3177 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3180 set_reg_plus_d (reg, base, disp, note)
3181 int reg, base, disp, note;
3185 if (VAL_14_BITS_P (disp))
3187 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3188 plus_constant (gen_rtx_REG (Pmode, base), disp));
3192 rtx basereg = gen_rtx_REG (Pmode, base);
3193 rtx delta = GEN_INT (disp);
3195 emit_move_insn (gen_rtx_REG (Pmode, 1),
3196 gen_rtx_PLUS (Pmode, basereg,
3197 gen_rtx_HIGH (Pmode, delta)));
3198 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3199 gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 1),
3203 if (DO_FRAME_NOTES && note)
3204 RTX_FRAME_RELATED_P (insn) = 1;
3208 compute_frame_size (size, fregs_live)
3215 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3216 be consistent with the rounding and size calculation done here.
3217 Change them at the same time. */
3219 /* We do our own stack alignment. First, round the size of the
3220 stack locals up to a word boundary. */
3221 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3223 /* Space for previous frame pointer + filler. If any frame is
3224 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3225 waste some space here for the sake of HP compatibility. The
3226 first slot is only used when the frame pointer is needed. */
3227 if (size || frame_pointer_needed)
3228 size += STARTING_FRAME_OFFSET;
3230 /* If the current function calls __builtin_eh_return, then we need
3231 to allocate stack space for registers that will hold data for
3232 the exception handler. */
3233 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3237 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3239 size += i * UNITS_PER_WORD;
3242 /* Account for space used by the callee general register saves. */
3243 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3244 if (regs_ever_live[i])
3245 size += UNITS_PER_WORD;
3247 /* Account for space used by the callee floating point register saves. */
3248 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3249 if (regs_ever_live[i]
3250 || (!TARGET_64BIT && regs_ever_live[i + 1]))
3254 /* We always save both halves of the FP register, so always
3255 increment the frame size by 8 bytes. */
3259 /* If any of the floating registers are saved, account for the
3260 alignment needed for the floating point register save block. */
3263 size = (size + 7) & ~7;
3268 /* The various ABIs include space for the outgoing parameters in the
3269 size of the current function's stack frame. We don't need to align
3270 for the outgoing arguments as their alignment is set by the final
3271 rounding for the frame as a whole. */
3272 size += current_function_outgoing_args_size;
3274 /* Allocate space for the fixed frame marker. This space must be
3275 allocated for any function that makes calls or allocates
3277 if (!current_function_is_leaf || size)
3278 size += TARGET_64BIT ? 48 : 32;
3280 /* Finally, round to the preferred stack boundary. */
3281 return ((size + PREFERRED_STACK_BOUNDARY / 8 - 1)
3282 & ~(PREFERRED_STACK_BOUNDARY / 8 - 1));
3285 /* Generate the assembly code for function entry. FILE is a stdio
3286 stream to output the code to. SIZE is an int: how many units of
3287 temporary storage to allocate.
3289 Refer to the array `regs_ever_live' to determine which registers to
3290 save; `regs_ever_live[I]' is nonzero if register number I is ever
3291 used in the function. This function is responsible for knowing
3292 which registers should not be saved even if used. */
3294 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3295 of memory. If any fpu reg is used in the function, we allocate
3296 such a block here, at the bottom of the frame, just in case it's needed.
3298 If this function is a leaf procedure, then we may choose not
3299 to do a "save" insn. The decision about whether or not
3300 to do this is made in regclass.c. */
3303 pa_output_function_prologue (file, size)
3305 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3307 /* The function's label and associated .PROC must never be
3308 separated and must be output *after* any profiling declarations
3309 to avoid changing spaces/subspaces within a procedure. */
3310 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3311 fputs ("\t.PROC\n", file);
3313 /* hppa_expand_prologue does the dirty work now. We just need
3314 to output the assembler directives which denote the start
3316 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
3317 if (regs_ever_live[2])
3318 fputs (",CALLS,SAVE_RP", file);
3320 fputs (",NO_CALLS", file);
3322 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3323 at the beginning of the frame and that it is used as the frame
3324 pointer for the frame. We do this because our current frame
3325 layout doesn't conform to that specified in the the HP runtime
3326 documentation and we need a way to indicate to programs such as
3327 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3328 isn't used by HP compilers but is supported by the assembler.
3329 However, SAVE_SP is supposed to indicate that the previous stack
3330 pointer has been saved in the frame marker. */
3331 if (frame_pointer_needed)
3332 fputs (",SAVE_SP", file);
3334 /* Pass on information about the number of callee register saves
3335 performed in the prologue.
3337 The compiler is supposed to pass the highest register number
3338 saved, the assembler then has to adjust that number before
3339 entering it into the unwind descriptor (to account for any
3340 caller saved registers with lower register numbers than the
3341 first callee saved register). */
3343 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3346 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3348 fputs ("\n\t.ENTRY\n", file);
3350 remove_useless_addtr_insns (0);
3354 hppa_expand_prologue ()
3356 int merge_sp_adjust_with_store = 0;
3357 int size = get_frame_size ();
3365 /* Compute total size for frame pointer, filler, locals and rounding to
3366 the next word boundary. Similar code appears in compute_frame_size
3367 and must be changed in tandem with this code. */
3368 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3369 if (local_fsize || frame_pointer_needed)
3370 local_fsize += STARTING_FRAME_OFFSET;
3372 actual_fsize = compute_frame_size (size, &save_fregs);
3374 /* Compute a few things we will use often. */
3375 tmpreg = gen_rtx_REG (word_mode, 1);
3377 /* Save RP first. The calling conventions manual states RP will
3378 always be stored into the caller's frame at sp - 20 or sp - 16
3379 depending on which ABI is in use. */
3380 if (regs_ever_live[2] || current_function_calls_eh_return)
3381 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3383 /* Allocate the local frame and set up the frame pointer if needed. */
3384 if (actual_fsize != 0)
3386 if (frame_pointer_needed)
3388 /* Copy the old frame pointer temporarily into %r1. Set up the
3389 new stack pointer, then store away the saved old frame pointer
3390 into the stack at sp and at the same time update the stack
3391 pointer by actual_fsize bytes. Two versions, first
3392 handles small (<8k) frames. The second handles large (>=8k)
3394 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3397 /* We need to record the frame pointer save here since the
3398 new frame pointer is set in the following insn. */
3399 RTX_FRAME_RELATED_P (insn) = 1;
3401 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3402 gen_rtx_SET (VOIDmode,
3403 gen_rtx_MEM (word_mode, stack_pointer_rtx),
3408 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3410 RTX_FRAME_RELATED_P (insn) = 1;
3412 if (VAL_14_BITS_P (actual_fsize))
3413 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3416 /* It is incorrect to store the saved frame pointer at *sp,
3417 then increment sp (writes beyond the current stack boundary).
3419 So instead use stwm to store at *sp and post-increment the
3420 stack pointer as an atomic operation. Then increment sp to
3421 finish allocating the new frame. */
3422 int adjust1 = 8192 - 64;
3423 int adjust2 = actual_fsize - adjust1;
3425 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3426 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3430 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3431 we need to store the previous stack pointer (frame pointer)
3432 into the frame marker on targets that use the HP unwind
3433 library. This allows the HP unwind library to be used to
3434 unwind GCC frames. However, we are not fully compatible
3435 with the HP library because our frame layout differs from
3436 that specified in the HP runtime specification.
3438 We don't want a frame note on this instruction as the frame
3439 marker moves during dynamic stack allocation.
3441 This instruction also serves as a blockage to prevent
3442 register spills from being scheduled before the stack
3443 pointer is raised. This is necessary as we store
3444 registers using the frame pointer as a base register,
3445 and the frame pointer is set before sp is raised. */
3446 if (TARGET_HPUX_UNWIND_LIBRARY)
3448 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3449 GEN_INT (TARGET_64BIT ? -8 : -4));
3451 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3455 emit_insn (gen_blockage ());
3457 /* no frame pointer needed. */
3460 /* In some cases we can perform the first callee register save
3461 and allocating the stack frame at the same time. If so, just
3462 make a note of it and defer allocating the frame until saving
3463 the callee registers. */
3464 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3465 merge_sp_adjust_with_store = 1;
3466 /* Can not optimize. Adjust the stack frame by actual_fsize
3469 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3474 /* Normal register save.
3476 Do not save the frame pointer in the frame_pointer_needed case. It
3477 was done earlier. */
3478 if (frame_pointer_needed)
3480 offset = local_fsize;
3482 /* Saving the EH return data registers in the frame is the simplest
3483 way to get the frame unwind information emitted. We put them
3484 just before the general registers. */
3485 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3487 unsigned int i, regno;
3491 regno = EH_RETURN_DATA_REGNO (i);
3492 if (regno == INVALID_REGNUM)
3495 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3496 offset += UNITS_PER_WORD;
3500 for (i = 18; i >= 4; i--)
3501 if (regs_ever_live[i] && ! call_used_regs[i])
3503 store_reg (i, offset, FRAME_POINTER_REGNUM);
3504 offset += UNITS_PER_WORD;
3507 /* Account for %r3 which is saved in a special place. */
3510 /* No frame pointer needed. */
3513 offset = local_fsize - actual_fsize;
3515 /* Saving the EH return data registers in the frame is the simplest
3516 way to get the frame unwind information emitted. */
3517 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3519 unsigned int i, regno;
3523 regno = EH_RETURN_DATA_REGNO (i);
3524 if (regno == INVALID_REGNUM)
3527 /* If merge_sp_adjust_with_store is nonzero, then we can
3528 optimize the first save. */
3529 if (merge_sp_adjust_with_store)
3531 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3532 merge_sp_adjust_with_store = 0;
3535 store_reg (regno, offset, STACK_POINTER_REGNUM);
3536 offset += UNITS_PER_WORD;
3540 for (i = 18; i >= 3; i--)
3541 if (regs_ever_live[i] && ! call_used_regs[i])
3543 /* If merge_sp_adjust_with_store is nonzero, then we can
3544 optimize the first GR save. */
3545 if (merge_sp_adjust_with_store)
3547 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3548 merge_sp_adjust_with_store = 0;
3551 store_reg (i, offset, STACK_POINTER_REGNUM);
3552 offset += UNITS_PER_WORD;
3556 /* If we wanted to merge the SP adjustment with a GR save, but we never
3557 did any GR saves, then just emit the adjustment here. */
3558 if (merge_sp_adjust_with_store)
3559 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3563 /* The hppa calling conventions say that %r19, the pic offset
3564 register, is saved at sp - 32 (in this function's frame)
3565 when generating PIC code. FIXME: What is the correct thing
3566 to do for functions which make no calls and allocate no
3567 frame? Do we need to allocate a frame, or can we just omit
3568 the save? For now we'll just omit the save. */
3569 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3570 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
3572 /* Align pointer properly (doubleword boundary). */
3573 offset = (offset + 7) & ~7;
3575 /* Floating point register store. */
3580 /* First get the frame or stack pointer to the start of the FP register
3582 if (frame_pointer_needed)
3584 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3585 base = frame_pointer_rtx;
3589 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3590 base = stack_pointer_rtx;
3593 /* Now actually save the FP registers. */
3594 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3596 if (regs_ever_live[i]
3597 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3599 rtx addr, insn, reg;
3600 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3601 reg = gen_rtx_REG (DFmode, i);
3602 insn = emit_move_insn (addr, reg);
3605 RTX_FRAME_RELATED_P (insn) = 1;
3608 rtx mem = gen_rtx_MEM (DFmode,
3609 plus_constant (base, offset));
3611 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3612 gen_rtx_SET (VOIDmode, mem, reg),
3617 rtx meml = gen_rtx_MEM (SFmode,
3618 plus_constant (base, offset));
3619 rtx memr = gen_rtx_MEM (SFmode,
3620 plus_constant (base, offset + 4));
3621 rtx regl = gen_rtx_REG (SFmode, i);
3622 rtx regr = gen_rtx_REG (SFmode, i + 1);
3623 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3624 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3627 RTX_FRAME_RELATED_P (setl) = 1;
3628 RTX_FRAME_RELATED_P (setr) = 1;
3629 vec = gen_rtvec (2, setl, setr);
3631 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3632 gen_rtx_SEQUENCE (VOIDmode, vec),
3636 offset += GET_MODE_SIZE (DFmode);
3643 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3644 Handle case where DISP > 8k by using the add_high_const patterns. */
3647 load_reg (reg, disp, base)
3648 int reg, disp, base;
3650 rtx src, dest, basereg;
3652 dest = gen_rtx_REG (word_mode, reg);
3653 basereg = gen_rtx_REG (Pmode, base);
3654 if (VAL_14_BITS_P (disp))
3656 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3657 emit_move_insn (dest, src);
3661 rtx delta = GEN_INT (disp);
3662 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3663 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3664 emit_move_insn (tmpreg, high);
3665 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3666 emit_move_insn (dest, src);
3670 /* This function generates the assembly code for function exit.
3671 Args are as for output_function_prologue ().
3673 The function epilogue should not depend on the current stack
3674 pointer! It should use the frame pointer only. This is mandatory
3675 because of alloca; we also take advantage of it to omit stack
3676 adjustments before returning. */
3679 pa_output_function_epilogue (file, size)
3681 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3683 int last_address = 0;
3684 rtx insn = get_last_insn ();
3686 /* hppa_expand_epilogue does the dirty work now. We just need
3687 to output the assembler directives which denote the end
3690 To make debuggers happy, emit a nop if the epilogue was completely
3691 eliminated due to a volatile call as the last insn in the
3692 current function. That way the return address (in %r2) will
3693 always point to a valid instruction in the current function. */
3695 /* Get the last real insn. */
3696 if (GET_CODE (insn) == NOTE)
3697 insn = prev_real_insn (insn);
3699 /* If it is a sequence, then look inside. */
3700 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3701 insn = XVECEXP (PATTERN (insn), 0, 0);
3703 /* If insn is a CALL_INSN, then it must be a call to a volatile
3704 function (otherwise there would be epilogue insns). */
3705 if (insn && GET_CODE (insn) == CALL_INSN)
3707 fputs ("\tnop\n", file);
3711 fputs ("\t.EXIT\n\t.PROCEND\n", file);
3713 /* Finally, update the total number of code bytes output so far. */
3714 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
3715 && !flag_function_sections)
3717 if (INSN_ADDRESSES_SET_P ())
3719 unsigned long old_total = total_code_bytes;
3721 insn = get_last_nonnote_insn ();
3722 last_address += INSN_ADDRESSES (INSN_UID (insn));
3724 last_address += insn_default_length (insn);
3726 total_code_bytes += last_address;
3727 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
3729 /* Be prepared to handle overflows. */
3730 if (old_total > total_code_bytes)
3731 total_code_bytes = -1;
3734 total_code_bytes = -1;
3739 hppa_expand_epilogue ()
3743 int merge_sp_adjust_with_load = 0;
3746 /* We will use this often. */
3747 tmpreg = gen_rtx_REG (word_mode, 1);
3749 /* Try to restore RP early to avoid load/use interlocks when
3750 RP gets used in the return (bv) instruction. This appears to still
3751 be necessary even when we schedule the prologue and epilogue. */
3752 if (regs_ever_live [2] || current_function_calls_eh_return)
3754 ret_off = TARGET_64BIT ? -16 : -20;
3755 if (frame_pointer_needed)
3757 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
3762 /* No frame pointer, and stack is smaller than 8k. */
3763 if (VAL_14_BITS_P (ret_off - actual_fsize))
3765 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
3771 /* General register restores. */
3772 if (frame_pointer_needed)
3774 offset = local_fsize;
3776 /* If the current function calls __builtin_eh_return, then we need
3777 to restore the saved EH data registers. */
3778 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3780 unsigned int i, regno;
3784 regno = EH_RETURN_DATA_REGNO (i);
3785 if (regno == INVALID_REGNUM)
3788 load_reg (regno, offset, FRAME_POINTER_REGNUM);
3789 offset += UNITS_PER_WORD;
3793 for (i = 18; i >= 4; i--)
3794 if (regs_ever_live[i] && ! call_used_regs[i])
3796 load_reg (i, offset, FRAME_POINTER_REGNUM);
3797 offset += UNITS_PER_WORD;
3802 offset = local_fsize - actual_fsize;
3804 /* If the current function calls __builtin_eh_return, then we need
3805 to restore the saved EH data registers. */
3806 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3808 unsigned int i, regno;
3812 regno = EH_RETURN_DATA_REGNO (i);
3813 if (regno == INVALID_REGNUM)
3816 /* Only for the first load.
3817 merge_sp_adjust_with_load holds the register load
3818 with which we will merge the sp adjustment. */
3819 if (merge_sp_adjust_with_load == 0
3821 && VAL_14_BITS_P (-actual_fsize))
3822 merge_sp_adjust_with_load = regno;
3824 load_reg (regno, offset, STACK_POINTER_REGNUM);
3825 offset += UNITS_PER_WORD;
3829 for (i = 18; i >= 3; i--)
3831 if (regs_ever_live[i] && ! call_used_regs[i])
3833 /* Only for the first load.
3834 merge_sp_adjust_with_load holds the register load
3835 with which we will merge the sp adjustment. */
3836 if (merge_sp_adjust_with_load == 0
3838 && VAL_14_BITS_P (-actual_fsize))
3839 merge_sp_adjust_with_load = i;
3841 load_reg (i, offset, STACK_POINTER_REGNUM);
3842 offset += UNITS_PER_WORD;
3847 /* Align pointer properly (doubleword boundary). */
3848 offset = (offset + 7) & ~7;
3850 /* FP register restores. */
3853 /* Adjust the register to index off of. */
3854 if (frame_pointer_needed)
3855 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3857 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3859 /* Actually do the restores now. */
3860 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3861 if (regs_ever_live[i]
3862 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3864 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3865 rtx dest = gen_rtx_REG (DFmode, i);
3866 emit_move_insn (dest, src);
3870 /* Emit a blockage insn here to keep these insns from being moved to
3871 an earlier spot in the epilogue, or into the main instruction stream.
3873 This is necessary as we must not cut the stack back before all the
3874 restores are finished. */
3875 emit_insn (gen_blockage ());
3877 /* Reset stack pointer (and possibly frame pointer). The stack
3878 pointer is initially set to fp + 64 to avoid a race condition. */
3879 if (frame_pointer_needed)
3881 rtx delta = GEN_INT (-64);
3883 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
3884 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
3886 /* If we were deferring a callee register restore, do it now. */
3887 else if (merge_sp_adjust_with_load)
3889 rtx delta = GEN_INT (-actual_fsize);
3890 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
3892 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
3894 else if (actual_fsize != 0)
3895 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3898 /* If we haven't restored %r2 yet (no frame pointer, and a stack
3899 frame greater than 8k), do so now. */
3901 load_reg (2, ret_off, STACK_POINTER_REGNUM);
3903 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3905 rtx sa = EH_RETURN_STACKADJ_RTX;
3907 emit_insn (gen_blockage ());
3908 emit_insn (TARGET_64BIT
3909 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
3910 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
3915 hppa_pic_save_rtx ()
3917 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
3921 hppa_profile_hook (label_no)
3924 rtx begin_label_rtx, call_insn;
3925 char begin_label_name[16];
3927 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
3929 begin_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (begin_label_name));
3932 emit_move_insn (arg_pointer_rtx,
3933 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
3936 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
3938 #ifndef NO_PROFILE_COUNTERS
3940 rtx count_label_rtx, addr, r24;
3941 char count_label_name[16];
3943 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
3944 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
3946 addr = force_reg (Pmode, count_label_rtx);
3947 r24 = gen_rtx_REG (Pmode, 24);
3948 emit_move_insn (r24, addr);
3950 /* %r25 is set from within the output pattern. */
3952 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3953 GEN_INT (TARGET_64BIT ? 24 : 12),
3956 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
3959 /* %r25 is set from within the output pattern. */
3961 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3962 GEN_INT (TARGET_64BIT ? 16 : 8),
3966 /* Indicate the _mcount call cannot throw, nor will it execute a
3968 REG_NOTES (call_insn)
3969 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
3973 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
3975 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
3977 emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
3981 /* Fetch the return address for the frame COUNT steps up from
3982 the current frame, after the prologue. FRAMEADDR is the
3983 frame pointer of the COUNT frame.
3985 We want to ignore any export stub remnants here. To handle this,
3986 we examine the code at the return address, and if it is an export
3987 stub, we return a memory rtx for the stub return address stored
3990 The value returned is used in two different ways:
3992 1. To find a function's caller.
3994 2. To change the return address for a function.
3996 This function handles most instances of case 1; however, it will
3997 fail if there are two levels of stubs to execute on the return
3998 path. The only way I believe that can happen is if the return value
3999 needs a parameter relocation, which never happens for C code.
4001 This function handles most instances of case 2; however, it will
4002 fail if we did not originally have stub code on the return path
4003 but will need stub code on the new return path. This can happen if
4004 the caller & callee are both in the main program, but the new
4005 return location is in a shared library. */
4008 return_addr_rtx (count, frameaddr)
4020 rp = get_hard_reg_initial_val (Pmode, 2);
4022 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4025 saved_rp = gen_reg_rtx (Pmode);
4026 emit_move_insn (saved_rp, rp);
4028 /* Get pointer to the instruction stream. We have to mask out the
4029 privilege level from the two low order bits of the return address
4030 pointer here so that ins will point to the start of the first
4031 instruction that would have been executed if we returned. */
4032 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4033 label = gen_label_rtx ();
4035 /* Check the instruction stream at the normal return address for the
4038 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4039 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4040 0x00011820 | stub+16: mtsp r1,sr0
4041 0xe0400002 | stub+20: be,n 0(sr0,rp)
4043 If it is an export stub, than our return address is really in
4046 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4047 NULL_RTX, SImode, 1);
4048 emit_jump_insn (gen_bne (label));
4050 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4051 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4052 emit_jump_insn (gen_bne (label));
4054 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4055 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4056 emit_jump_insn (gen_bne (label));
4058 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4059 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
4061 /* If there is no export stub then just use the value saved from
4062 the return pointer register. */
4064 emit_jump_insn (gen_bne (label));
4066 /* Here we know that our return address points to an export
4067 stub. We don't want to return the address of the export stub,
4068 but rather the return address of the export stub. That return
4069 address is stored at -24[frameaddr]. */
4071 emit_move_insn (saved_rp,
4073 memory_address (Pmode,
4074 plus_constant (frameaddr,
4081 /* This is only valid once reload has completed because it depends on
4082 knowing exactly how much (if any) frame there is and...
4084 It's only valid if there is no frame marker to de-allocate and...
4086 It's only valid if %r2 hasn't been saved into the caller's frame
4087 (we're not profiling and %r2 isn't live anywhere). */
4089 hppa_can_use_return_insn_p ()
4091 return (reload_completed
4092 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
4093 && ! regs_ever_live[2]
4094 && ! frame_pointer_needed);
4098 emit_bcond_fp (code, operand0)
4102 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4103 gen_rtx_IF_THEN_ELSE (VOIDmode,
4104 gen_rtx_fmt_ee (code,
4106 gen_rtx_REG (CCFPmode, 0),
4108 gen_rtx_LABEL_REF (VOIDmode, operand0),
4114 gen_cmp_fp (code, operand0, operand1)
4116 rtx operand0, operand1;
4118 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4119 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4122 /* Adjust the cost of a scheduling dependency. Return the new cost of
4123 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4126 pa_adjust_cost (insn, link, dep_insn, cost)
4132 enum attr_type attr_type;
4134 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4135 true dependencies as they are described with bypasses now. */
4136 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4139 if (! recog_memoized (insn))
4142 attr_type = get_attr_type (insn);
4144 if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
4146 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4149 if (attr_type == TYPE_FPLOAD)
4151 rtx pat = PATTERN (insn);
4152 rtx dep_pat = PATTERN (dep_insn);
4153 if (GET_CODE (pat) == PARALLEL)
4155 /* This happens for the fldXs,mb patterns. */
4156 pat = XVECEXP (pat, 0, 0);
4158 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4159 /* If this happens, we have to extend this to schedule
4160 optimally. Return 0 for now. */
4163 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4165 if (! recog_memoized (dep_insn))
4167 switch (get_attr_type (dep_insn))
4174 case TYPE_FPSQRTSGL:
4175 case TYPE_FPSQRTDBL:
4176 /* A fpload can't be issued until one cycle before a
4177 preceding arithmetic operation has finished if
4178 the target of the fpload is any of the sources
4179 (or destination) of the arithmetic operation. */
4180 return insn_default_latency (dep_insn) - 1;
4187 else if (attr_type == TYPE_FPALU)
4189 rtx pat = PATTERN (insn);
4190 rtx dep_pat = PATTERN (dep_insn);
4191 if (GET_CODE (pat) == PARALLEL)
4193 /* This happens for the fldXs,mb patterns. */
4194 pat = XVECEXP (pat, 0, 0);
4196 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4197 /* If this happens, we have to extend this to schedule
4198 optimally. Return 0 for now. */
4201 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4203 if (! recog_memoized (dep_insn))
4205 switch (get_attr_type (dep_insn))
4209 case TYPE_FPSQRTSGL:
4210 case TYPE_FPSQRTDBL:
4211 /* An ALU flop can't be issued until two cycles before a
4212 preceding divide or sqrt operation has finished if
4213 the target of the ALU flop is any of the sources
4214 (or destination) of the divide or sqrt operation. */
4215 return insn_default_latency (dep_insn) - 2;
4223 /* For other anti dependencies, the cost is 0. */
4226 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4228 /* Output dependency; DEP_INSN writes a register that INSN writes some
4230 if (attr_type == TYPE_FPLOAD)
4232 rtx pat = PATTERN (insn);
4233 rtx dep_pat = PATTERN (dep_insn);
4234 if (GET_CODE (pat) == PARALLEL)
4236 /* This happens for the fldXs,mb patterns. */
4237 pat = XVECEXP (pat, 0, 0);
4239 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4240 /* If this happens, we have to extend this to schedule
4241 optimally. Return 0 for now. */
4244 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4246 if (! recog_memoized (dep_insn))
4248 switch (get_attr_type (dep_insn))
4255 case TYPE_FPSQRTSGL:
4256 case TYPE_FPSQRTDBL:
4257 /* A fpload can't be issued until one cycle before a
4258 preceding arithmetic operation has finished if
4259 the target of the fpload is the destination of the
4260 arithmetic operation.
4262 Exception: For PA7100LC, PA7200 and PA7300, the cost
4263 is 3 cycles, unless they bundle together. We also
4264 pay the penalty if the second insn is a fpload. */
4265 return insn_default_latency (dep_insn) - 1;
4272 else if (attr_type == TYPE_FPALU)
4274 rtx pat = PATTERN (insn);
4275 rtx dep_pat = PATTERN (dep_insn);
4276 if (GET_CODE (pat) == PARALLEL)
4278 /* This happens for the fldXs,mb patterns. */
4279 pat = XVECEXP (pat, 0, 0);
4281 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4282 /* If this happens, we have to extend this to schedule
4283 optimally. Return 0 for now. */
4286 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4288 if (! recog_memoized (dep_insn))
4290 switch (get_attr_type (dep_insn))
4294 case TYPE_FPSQRTSGL:
4295 case TYPE_FPSQRTDBL:
4296 /* An ALU flop can't be issued until two cycles before a
4297 preceding divide or sqrt operation has finished if
4298 the target of the ALU flop is also the target of
4299 the divide or sqrt operation. */
4300 return insn_default_latency (dep_insn) - 2;
4308 /* For other output dependencies, the cost is 0. */
4315 /* Adjust scheduling priorities. We use this to try and keep addil
4316 and the next use of %r1 close together. */
4318 pa_adjust_priority (insn, priority)
4322 rtx set = single_set (insn);
4326 src = SET_SRC (set);
4327 dest = SET_DEST (set);
4328 if (GET_CODE (src) == LO_SUM
4329 && symbolic_operand (XEXP (src, 1), VOIDmode)
4330 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4333 else if (GET_CODE (src) == MEM
4334 && GET_CODE (XEXP (src, 0)) == LO_SUM
4335 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4336 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4339 else if (GET_CODE (dest) == MEM
4340 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4341 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4342 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4348 /* The 700 can only issue a single insn at a time.
4349 The 7XXX processors can issue two insns at a time.
4350 The 8000 can issue 4 insns at a time. */
4356 case PROCESSOR_700: return 1;
4357 case PROCESSOR_7100: return 2;
4358 case PROCESSOR_7100LC: return 2;
4359 case PROCESSOR_7200: return 2;
4360 case PROCESSOR_7300: return 2;
4361 case PROCESSOR_8000: return 4;
4370 /* Return any length adjustment needed by INSN which already has its length
4371 computed as LENGTH. Return zero if no adjustment is necessary.
4373 For the PA: function calls, millicode calls, and backwards short
4374 conditional branches with unfilled delay slots need an adjustment by +1
4375 (to account for the NOP which will be inserted into the instruction stream).
4377 Also compute the length of an inline block move here as it is too
4378 complicated to express as a length attribute in pa.md. */
4380 pa_adjust_insn_length (insn, length)
4384 rtx pat = PATTERN (insn);
4386 /* Call insns which are *not* indirect and have unfilled delay slots. */
4387 if (GET_CODE (insn) == CALL_INSN)
4390 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
4391 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
4393 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
4394 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
4400 /* Jumps inside switch tables which have unfilled delay slots
4401 also need adjustment. */
4402 else if (GET_CODE (insn) == JUMP_INSN
4403 && simplejump_p (insn)
4404 && GET_MODE (insn) == SImode)
4406 /* Millicode insn with an unfilled delay slot. */
4407 else if (GET_CODE (insn) == INSN
4408 && GET_CODE (pat) != SEQUENCE
4409 && GET_CODE (pat) != USE
4410 && GET_CODE (pat) != CLOBBER
4411 && get_attr_type (insn) == TYPE_MILLI)
4413 /* Block move pattern. */
4414 else if (GET_CODE (insn) == INSN
4415 && GET_CODE (pat) == PARALLEL
4416 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4417 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4418 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4419 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4420 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4421 return compute_movstrsi_length (insn) - 4;
4422 /* Conditional branch with an unfilled delay slot. */
4423 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4425 /* Adjust a short backwards conditional with an unfilled delay slot. */
4426 if (GET_CODE (pat) == SET
4428 && ! forward_branch_p (insn))
4430 else if (GET_CODE (pat) == PARALLEL
4431 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4434 /* Adjust dbra insn with short backwards conditional branch with
4435 unfilled delay slot -- only for case where counter is in a
4436 general register register. */
4437 else if (GET_CODE (pat) == PARALLEL
4438 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4439 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4440 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4442 && ! forward_branch_p (insn))
4450 /* Print operand X (an rtx) in assembler syntax to file FILE.
4451 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4452 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4455 print_operand (file, x, code)
4463 /* Output a 'nop' if there's nothing for the delay slot. */
4464 if (dbr_sequence_length () == 0)
4465 fputs ("\n\tnop", file);
4468 /* Output a nullification completer if there's nothing for the */
4469 /* delay slot or nullification is requested. */
4470 if (dbr_sequence_length () == 0 ||
4472 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4476 /* Print out the second register name of a register pair.
4477 I.e., R (6) => 7. */
4478 fputs (reg_names[REGNO (x) + 1], file);
4481 /* A register or zero. */
4483 || (x == CONST0_RTX (DFmode))
4484 || (x == CONST0_RTX (SFmode)))
4486 fputs ("%r0", file);
4492 /* A register or zero (floating point). */
4494 || (x == CONST0_RTX (DFmode))
4495 || (x == CONST0_RTX (SFmode)))
4497 fputs ("%fr0", file);
4506 xoperands[0] = XEXP (XEXP (x, 0), 0);
4507 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4508 output_global_address (file, xoperands[1], 0);
4509 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4513 case 'C': /* Plain (C)ondition */
4515 switch (GET_CODE (x))
4518 fputs ("=", file); break;
4520 fputs ("<>", file); break;
4522 fputs (">", file); break;
4524 fputs (">=", file); break;
4526 fputs (">>=", file); break;
4528 fputs (">>", file); break;
4530 fputs ("<", file); break;
4532 fputs ("<=", file); break;
4534 fputs ("<<=", file); break;
4536 fputs ("<<", file); break;
4541 case 'N': /* Condition, (N)egated */
4542 switch (GET_CODE (x))
4545 fputs ("<>", file); break;
4547 fputs ("=", file); break;
4549 fputs ("<=", file); break;
4551 fputs ("<", file); break;
4553 fputs ("<<", file); break;
4555 fputs ("<<=", file); break;
4557 fputs (">=", file); break;
4559 fputs (">", file); break;
4561 fputs (">>", file); break;
4563 fputs (">>=", file); break;
4568 /* For floating point comparisons. Note that the output
4569 predicates are the complement of the desired mode. */
4571 switch (GET_CODE (x))
4574 fputs ("!=", file); break;
4576 fputs ("=", file); break;
4578 fputs ("!>", file); break;
4580 fputs ("!>=", file); break;
4582 fputs ("!<", file); break;
4584 fputs ("!<=", file); break;
4586 fputs ("!<>", file); break;
4588 fputs (">", file); break;
4590 fputs (">=", file); break;
4592 fputs ("<", file); break;
4594 fputs ("<=", file); break;
4596 fputs ("<>", file); break;
4598 fputs ("<=>", file); break;
4600 fputs ("!<=>", file); break;
4605 case 'S': /* Condition, operands are (S)wapped. */
4606 switch (GET_CODE (x))
4609 fputs ("=", file); break;
4611 fputs ("<>", file); break;
4613 fputs ("<", file); break;
4615 fputs ("<=", file); break;
4617 fputs ("<<=", file); break;
4619 fputs ("<<", file); break;
4621 fputs (">", file); break;
4623 fputs (">=", file); break;
4625 fputs (">>=", file); break;
4627 fputs (">>", file); break;
4632 case 'B': /* Condition, (B)oth swapped and negate. */
4633 switch (GET_CODE (x))
4636 fputs ("<>", file); break;
4638 fputs ("=", file); break;
4640 fputs (">=", file); break;
4642 fputs (">", file); break;
4644 fputs (">>", file); break;
4646 fputs (">>=", file); break;
4648 fputs ("<=", file); break;
4650 fputs ("<", file); break;
4652 fputs ("<<", file); break;
4654 fputs ("<<=", file); break;
4660 if (GET_CODE (x) == CONST_INT)
4662 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4667 if (GET_CODE (x) == CONST_INT)
4669 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4674 if (GET_CODE (x) == CONST_INT)
4676 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4681 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
4683 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4688 if (GET_CODE (x) == CONST_INT)
4690 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4695 if (GET_CODE (x) == CONST_INT)
4697 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4702 if (GET_CODE (x) == CONST_INT)
4707 switch (GET_CODE (XEXP (x, 0)))
4711 if (ASSEMBLER_DIALECT == 0)
4712 fputs ("s,mb", file);
4714 fputs (",mb", file);
4718 if (ASSEMBLER_DIALECT == 0)
4719 fputs ("s,ma", file);
4721 fputs (",ma", file);
4724 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4725 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4727 if (ASSEMBLER_DIALECT == 0)
4728 fputs ("x,s", file);
4732 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4736 if (code == 'F' && ASSEMBLER_DIALECT == 0)
4742 output_global_address (file, x, 0);
4745 output_global_address (file, x, 1);
4747 case 0: /* Don't do anything special */
4752 compute_zdepwi_operands (INTVAL (x), op);
4753 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4759 compute_zdepdi_operands (INTVAL (x), op);
4760 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4764 /* We can get here from a .vtable_inherit due to our
4765 CONSTANT_ADDRESS_P rejecting perfectly good constant
4771 if (GET_CODE (x) == REG)
4773 fputs (reg_names [REGNO (x)], file);
4774 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
4780 && GET_MODE_SIZE (GET_MODE (x)) <= 4
4781 && (REGNO (x) & 1) == 0)
4784 else if (GET_CODE (x) == MEM)
4786 int size = GET_MODE_SIZE (GET_MODE (x));
4787 rtx base = NULL_RTX;
4788 switch (GET_CODE (XEXP (x, 0)))
4792 base = XEXP (XEXP (x, 0), 0);
4793 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
4797 base = XEXP (XEXP (x, 0), 0);
4798 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
4801 if (GET_CODE (XEXP (x, 0)) == PLUS
4802 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
4803 fprintf (file, "%s(%s)",
4804 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
4805 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
4806 else if (GET_CODE (XEXP (x, 0)) == PLUS
4807 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4808 fprintf (file, "%s(%s)",
4809 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
4810 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
4812 output_address (XEXP (x, 0));
4817 output_addr_const (file, x);
4820 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
4823 output_global_address (file, x, round_constant)
4829 /* Imagine (high (const (plus ...))). */
4830 if (GET_CODE (x) == HIGH)
4833 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
4834 assemble_name (file, XSTR (x, 0));
4835 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
4837 assemble_name (file, XSTR (x, 0));
4838 fputs ("-$global$", file);
4840 else if (GET_CODE (x) == CONST)
4842 const char *sep = "";
4843 int offset = 0; /* assembler wants -$global$ at end */
4844 rtx base = NULL_RTX;
4846 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4848 base = XEXP (XEXP (x, 0), 0);
4849 output_addr_const (file, base);
4851 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
4852 offset = INTVAL (XEXP (XEXP (x, 0), 0));
4855 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
4857 base = XEXP (XEXP (x, 0), 1);
4858 output_addr_const (file, base);
4860 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
4861 offset = INTVAL (XEXP (XEXP (x, 0), 1));
4864 /* How bogus. The compiler is apparently responsible for
4865 rounding the constant if it uses an LR field selector.
4867 The linker and/or assembler seem a better place since
4868 they have to do this kind of thing already.
4870 If we fail to do this, HP's optimizing linker may eliminate
4871 an addil, but not update the ldw/stw/ldo instruction that
4872 uses the result of the addil. */
4874 offset = ((offset + 0x1000) & ~0x1fff);
4876 if (GET_CODE (XEXP (x, 0)) == PLUS)
4886 else if (GET_CODE (XEXP (x, 0)) == MINUS
4887 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4891 if (!read_only_operand (base, VOIDmode) && !flag_pic)
4892 fputs ("-$global$", file);
4894 fprintf (file, "%s%d", sep, offset);
4897 output_addr_const (file, x);
4900 static struct deferred_plabel *
4906 /* See if we have already put this function on the list of deferred
4907 plabels. This list is generally small, so a liner search is not
4908 too ugly. If it proves too slow replace it with something faster. */
4909 for (i = 0; i < n_deferred_plabels; i++)
4910 if (strcmp (fname, deferred_plabels[i].name) == 0)
4913 /* If the deferred plabel list is empty, or this entry was not found
4914 on the list, create a new entry on the list. */
4915 if (deferred_plabels == NULL || i == n_deferred_plabels)
4917 const char *real_name;
4919 if (deferred_plabels == 0)
4920 deferred_plabels = (struct deferred_plabel *)
4921 ggc_alloc (sizeof (struct deferred_plabel));
4923 deferred_plabels = (struct deferred_plabel *)
4924 ggc_realloc (deferred_plabels,
4925 ((n_deferred_plabels + 1)
4926 * sizeof (struct deferred_plabel)));
4928 i = n_deferred_plabels++;
4929 deferred_plabels[i].internal_label = gen_label_rtx ();
4930 deferred_plabels[i].name = ggc_strdup (fname);
4932 /* Gross. We have just implicitly taken the address of this function,
4934 real_name = (*targetm.strip_name_encoding) (fname);
4935 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
4938 return &deferred_plabels[i];
4942 output_deferred_plabels (file)
4946 /* If we have deferred plabels, then we need to switch into the data
4947 section and align it to a 4 byte boundary before we output the
4948 deferred plabels. */
4949 if (n_deferred_plabels)
4952 ASM_OUTPUT_ALIGN (file, TARGET_64BIT ? 3 : 2);
4955 /* Now output the deferred plabels. */
4956 for (i = 0; i < n_deferred_plabels; i++)
4958 (*targetm.asm_out.internal_label) (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
4959 assemble_integer (gen_rtx_SYMBOL_REF (Pmode, deferred_plabels[i].name),
4960 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
4964 /* HP's millicode routines mean something special to the assembler.
4965 Keep track of which ones we have used. */
4967 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
4968 static void import_milli PARAMS ((enum millicodes));
4969 static char imported[(int) end1000];
4970 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
4971 static const char import_string[] = ".IMPORT $$....,MILLICODE";
4972 #define MILLI_START 10
4976 enum millicodes code;
4978 char str[sizeof (import_string)];
4980 if (!imported[(int) code])
4982 imported[(int) code] = 1;
4983 strcpy (str, import_string);
4984 strncpy (str + MILLI_START, milli_names[(int) code], 4);
4985 output_asm_insn (str, 0);
4989 /* The register constraints have put the operands and return value in
4990 the proper registers. */
4993 output_mul_insn (unsignedp, insn)
4994 int unsignedp ATTRIBUTE_UNUSED;
4997 import_milli (mulI);
4998 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5001 /* Emit the rtl for doing a division by a constant. */
5003 /* Do magic division millicodes exist for this value? */
5004 static const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
5007 /* We'll use an array to keep track of the magic millicodes and
5008 whether or not we've used them already. [n][0] is signed, [n][1] is
5011 static int div_milli[16][2];
5014 div_operand (op, mode)
5016 enum machine_mode mode;
5018 return (mode == SImode
5019 && ((GET_CODE (op) == REG && REGNO (op) == 25)
5020 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
5021 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
5025 emit_hpdiv_const (operands, unsignedp)
5029 if (GET_CODE (operands[2]) == CONST_INT
5030 && INTVAL (operands[2]) > 0
5031 && INTVAL (operands[2]) < 16
5032 && magic_milli[INTVAL (operands[2])])
5034 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5036 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5039 (PARALLEL, VOIDmode,
5040 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5041 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5043 gen_rtx_REG (SImode, 26),
5045 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5046 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5047 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5048 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5049 gen_rtx_CLOBBER (VOIDmode, ret))));
5050 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5057 output_div_insn (operands, unsignedp, insn)
5064 /* If the divisor is a constant, try to use one of the special
5066 if (GET_CODE (operands[0]) == CONST_INT)
5068 static char buf[100];
5069 divisor = INTVAL (operands[0]);
5070 if (!div_milli[divisor][unsignedp])
5072 div_milli[divisor][unsignedp] = 1;
5074 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5076 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5080 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5081 INTVAL (operands[0]));
5082 return output_millicode_call (insn,
5083 gen_rtx_SYMBOL_REF (SImode, buf));
5087 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5088 INTVAL (operands[0]));
5089 return output_millicode_call (insn,
5090 gen_rtx_SYMBOL_REF (SImode, buf));
5093 /* Divisor isn't a special constant. */
5098 import_milli (divU);
5099 return output_millicode_call (insn,
5100 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5104 import_milli (divI);
5105 return output_millicode_call (insn,
5106 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5111 /* Output a $$rem millicode to do mod. */
5114 output_mod_insn (unsignedp, insn)
5120 import_milli (remU);
5121 return output_millicode_call (insn,
5122 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5126 import_milli (remI);
5127 return output_millicode_call (insn,
5128 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5133 output_arg_descriptor (call_insn)
5136 const char *arg_regs[4];
5137 enum machine_mode arg_mode;
5139 int i, output_flag = 0;
5142 /* We neither need nor want argument location descriptors for the
5143 64bit runtime environment or the ELF32 environment. */
5144 if (TARGET_64BIT || TARGET_ELF32)
5147 for (i = 0; i < 4; i++)
5150 /* Specify explicitly that no argument relocations should take place
5151 if using the portable runtime calling conventions. */
5152 if (TARGET_PORTABLE_RUNTIME)
5154 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5159 if (GET_CODE (call_insn) != CALL_INSN)
5161 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
5163 rtx use = XEXP (link, 0);
5165 if (! (GET_CODE (use) == USE
5166 && GET_CODE (XEXP (use, 0)) == REG
5167 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5170 arg_mode = GET_MODE (XEXP (use, 0));
5171 regno = REGNO (XEXP (use, 0));
5172 if (regno >= 23 && regno <= 26)
5174 arg_regs[26 - regno] = "GR";
5175 if (arg_mode == DImode)
5176 arg_regs[25 - regno] = "GR";
5178 else if (regno >= 32 && regno <= 39)
5180 if (arg_mode == SFmode)
5181 arg_regs[(regno - 32) / 2] = "FR";
5184 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5185 arg_regs[(regno - 34) / 2] = "FR";
5186 arg_regs[(regno - 34) / 2 + 1] = "FU";
5188 arg_regs[(regno - 34) / 2] = "FU";
5189 arg_regs[(regno - 34) / 2 + 1] = "FR";
5194 fputs ("\t.CALL ", asm_out_file);
5195 for (i = 0; i < 4; i++)
5200 fputc (',', asm_out_file);
5201 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5204 fputc ('\n', asm_out_file);
5207 /* Return the class of any secondary reload register that is needed to
5208 move IN into a register in class CLASS using mode MODE.
5210 Profiling has showed this routine and its descendants account for
5211 a significant amount of compile time (~7%). So it has been
5212 optimized to reduce redundant computations and eliminate useless
5215 It might be worthwhile to try and make this a leaf function too. */
5218 secondary_reload_class (class, mode, in)
5219 enum reg_class class;
5220 enum machine_mode mode;
5223 int regno, is_symbolic;
5225 /* Trying to load a constant into a FP register during PIC code
5226 generation will require %r1 as a scratch register. */
5228 && GET_MODE_CLASS (mode) == MODE_INT
5229 && FP_REG_CLASS_P (class)
5230 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
5233 /* Profiling showed the PA port spends about 1.3% of its compilation
5234 time in true_regnum from calls inside secondary_reload_class. */
5236 if (GET_CODE (in) == REG)
5239 if (regno >= FIRST_PSEUDO_REGISTER)
5240 regno = true_regnum (in);
5242 else if (GET_CODE (in) == SUBREG)
5243 regno = true_regnum (in);
5247 /* If we have something like (mem (mem (...)), we can safely assume the
5248 inner MEM will end up in a general register after reloading, so there's
5249 no need for a secondary reload. */
5250 if (GET_CODE (in) == MEM
5251 && GET_CODE (XEXP (in, 0)) == MEM)
5254 /* Handle out of range displacement for integer mode loads/stores of
5256 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5257 && GET_MODE_CLASS (mode) == MODE_INT
5258 && FP_REG_CLASS_P (class))
5259 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5260 return GENERAL_REGS;
5262 /* A SAR<->FP register copy requires a secondary register (GPR) as
5263 well as secondary memory. */
5264 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5265 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5266 || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5267 return GENERAL_REGS;
5269 if (GET_CODE (in) == HIGH)
5272 /* Profiling has showed GCC spends about 2.6% of its compilation
5273 time in symbolic_operand from calls inside secondary_reload_class.
5275 We use an inline copy and only compute its return value once to avoid
5277 switch (GET_CODE (in))
5287 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
5288 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
5289 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
5299 && read_only_operand (in, VOIDmode))
5302 if (class != R1_REGS && is_symbolic)
5309 function_arg_padding (mode, type)
5310 enum machine_mode mode;
5314 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5316 /* Return none if justification is not required. */
5318 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5319 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5322 /* The directions set here are ignored when a BLKmode argument larger
5323 than a word is placed in a register. Different code is used for
5324 the stack and registers. This makes it difficult to have a
5325 consistent data representation for both the stack and registers.
5326 For both runtimes, the justification and padding for arguments on
5327 the stack and in registers should be identical. */
5329 /* The 64-bit runtime specifies left justification for aggregates. */
5332 /* The 32-bit runtime architecture specifies right justification.
5333 When the argument is passed on the stack, the argument is padded
5334 with garbage on the left. The HP compiler pads with zeros. */
5338 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5345 /* Do what is necessary for `va_start'. We look at the current function
5346 to determine if stdargs or varargs is used and fill in an initial
5347 va_list. A pointer to this constructor is returned. */
5350 hppa_builtin_saveregs ()
5353 tree fntype = TREE_TYPE (current_function_decl);
5354 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5355 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5356 != void_type_node)))
5357 ? UNITS_PER_WORD : 0);
5360 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5362 offset = current_function_arg_offset_rtx;
5368 /* Adjust for varargs/stdarg differences. */
5370 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5372 offset = current_function_arg_offset_rtx;
5374 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5375 from the incoming arg pointer and growing to larger addresses. */
5376 for (i = 26, off = -64; i >= 19; i--, off += 8)
5377 emit_move_insn (gen_rtx_MEM (word_mode,
5378 plus_constant (arg_pointer_rtx, off)),
5379 gen_rtx_REG (word_mode, i));
5381 /* The incoming args pointer points just beyond the flushback area;
5382 normally this is not a serious concern. However, when we are doing
5383 varargs/stdargs we want to make the arg pointer point to the start
5384 of the incoming argument area. */
5385 emit_move_insn (virtual_incoming_args_rtx,
5386 plus_constant (arg_pointer_rtx, -64));
5388 /* Now return a pointer to the first anonymous argument. */
5389 return copy_to_reg (expand_binop (Pmode, add_optab,
5390 virtual_incoming_args_rtx,
5391 offset, 0, 0, OPTAB_LIB_WIDEN));
5394 /* Store general registers on the stack. */
5395 dest = gen_rtx_MEM (BLKmode,
5396 plus_constant (current_function_internal_arg_pointer,
5398 set_mem_alias_set (dest, get_varargs_alias_set ());
5399 set_mem_align (dest, BITS_PER_WORD);
5400 move_block_from_reg (23, dest, 4);
5402 /* move_block_from_reg will emit code to store the argument registers
5403 individually as scalar stores.
5405 However, other insns may later load from the same addresses for
5406 a structure load (passing a struct to a varargs routine).
5408 The alias code assumes that such aliasing can never happen, so we
5409 have to keep memory referencing insns from moving up beyond the
5410 last argument register store. So we emit a blockage insn here. */
5411 emit_insn (gen_blockage ());
5413 return copy_to_reg (expand_binop (Pmode, add_optab,
5414 current_function_internal_arg_pointer,
5415 offset, 0, 0, OPTAB_LIB_WIDEN));
5419 hppa_va_start (valist, nextarg)
5423 nextarg = expand_builtin_saveregs ();
5424 std_expand_builtin_va_start (valist, nextarg);
5428 hppa_va_arg (valist, type)
5431 HOST_WIDE_INT size = int_size_in_bytes (type);
5437 /* Every argument in PA64 is supposed to be passed by value
5438 (including large structs). However, as a GCC extension, we
5439 pass zero and variable sized arguments by reference. Empty
5440 structures are a GCC extension not supported by the HP
5441 compilers. Thus, passing them by reference isn't likely
5442 to conflict with the ABI. For variable sized arguments,
5443 GCC doesn't have the infrastructure to allocate these to
5446 /* Arguments with a size greater than 8 must be aligned 0 MOD 16. */
5448 if (size > UNITS_PER_WORD)
5450 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5451 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
5452 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
5453 build_int_2 (-2 * UNITS_PER_WORD, -1));
5454 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5455 TREE_SIDE_EFFECTS (t) = 1;
5456 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5460 return std_expand_builtin_va_arg (valist, type);
5463 ptr = build_pointer_type (type);
5465 /* Args grow upward. */
5466 t = build (POSTINCREMENT_EXPR, TREE_TYPE (valist), valist,
5467 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5468 TREE_SIDE_EFFECTS (t) = 1;
5470 pptr = build_pointer_type (ptr);
5471 t = build1 (NOP_EXPR, pptr, t);
5472 TREE_SIDE_EFFECTS (t) = 1;
5474 t = build1 (INDIRECT_REF, ptr, t);
5475 TREE_SIDE_EFFECTS (t) = 1;
5478 else /* !TARGET_64BIT */
5480 ptr = build_pointer_type (type);
5482 /* "Large" and variable sized types are passed by reference. */
5483 if (size > 8 || size <= 0)
5485 /* Args grow downward. */
5486 t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
5487 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5488 TREE_SIDE_EFFECTS (t) = 1;
5490 pptr = build_pointer_type (ptr);
5491 t = build1 (NOP_EXPR, pptr, t);
5492 TREE_SIDE_EFFECTS (t) = 1;
5494 t = build1 (INDIRECT_REF, ptr, t);
5495 TREE_SIDE_EFFECTS (t) = 1;
5499 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5500 build_int_2 (-size, -1));
5502 /* Copied from va-pa.h, but we probably don't need to align to
5503 word size, since we generate and preserve that invariant. */
5504 t = build (BIT_AND_EXPR, TREE_TYPE (valist), t,
5505 build_int_2 ((size > 4 ? -8 : -4), -1));
5507 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5508 TREE_SIDE_EFFECTS (t) = 1;
5510 ofs = (8 - size) % 4;
5513 t = build (PLUS_EXPR, TREE_TYPE (valist), t,
5514 build_int_2 (ofs, 0));
5515 TREE_SIDE_EFFECTS (t) = 1;
5518 t = build1 (NOP_EXPR, ptr, t);
5519 TREE_SIDE_EFFECTS (t) = 1;
5524 return expand_expr (t, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5529 /* This routine handles all the normal conditional branch sequences we
5530 might need to generate. It handles compare immediate vs compare
5531 register, nullification of delay slots, varying length branches,
5532 negated branches, and all combinations of the above. It returns the
5533 output appropriate to emit the branch corresponding to all given
5537 output_cbranch (operands, nullify, length, negated, insn)
5539 int nullify, length, negated;
5542 static char buf[100];
5546 /* A conditional branch to the following instruction (eg the delay slot)
5547 is asking for a disaster. This can happen when not optimizing and
5548 when jump optimization fails.
5550 While it is usually safe to emit nothing, this can fail if the
5551 preceding instruction is a nullified branch with an empty delay
5552 slot and the same branch target as this branch. We could check
5553 for this but jump optimization should eliminate nop jumps. It
5554 is always safe to emit a nop. */
5555 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
5558 /* If this is a long branch with its delay slot unfilled, set `nullify'
5559 as it can nullify the delay slot and save a nop. */
5560 if (length == 8 && dbr_sequence_length () == 0)
5563 /* If this is a short forward conditional branch which did not get
5564 its delay slot filled, the delay slot can still be nullified. */
5565 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5566 nullify = forward_branch_p (insn);
5568 /* A forward branch over a single nullified insn can be done with a
5569 comclr instruction. This avoids a single cycle penalty due to
5570 mis-predicted branch if we fall through (branch not taken). */
5572 && next_real_insn (insn) != 0
5573 && get_attr_length (next_real_insn (insn)) == 4
5574 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5580 /* All short conditional branches except backwards with an unfilled
5584 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5586 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5587 if (GET_MODE (operands[1]) == DImode)
5590 strcat (buf, "%B3");
5592 strcat (buf, "%S3");
5594 strcat (buf, " %2,%r1,%%r0");
5596 strcat (buf, ",n %2,%r1,%0");
5598 strcat (buf, " %2,%r1,%0");
5601 /* All long conditionals. Note a short backward branch with an
5602 unfilled delay slot is treated just like a long backward branch
5603 with an unfilled delay slot. */
5605 /* Handle weird backwards branch with a filled delay slot
5606 with is nullified. */
5607 if (dbr_sequence_length () != 0
5608 && ! forward_branch_p (insn)
5611 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5612 if (GET_MODE (operands[1]) == DImode)
5615 strcat (buf, "%S3");
5617 strcat (buf, "%B3");
5618 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
5620 /* Handle short backwards branch with an unfilled delay slot.
5621 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
5622 taken and untaken branches. */
5623 else if (dbr_sequence_length () == 0
5624 && ! forward_branch_p (insn)
5625 && INSN_ADDRESSES_SET_P ()
5626 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5627 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5629 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5630 if (GET_MODE (operands[1]) == DImode)
5633 strcat (buf, "%B3 %2,%r1,%0%#");
5635 strcat (buf, "%S3 %2,%r1,%0%#");
5639 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5640 if (GET_MODE (operands[1]) == DImode)
5643 strcat (buf, "%S3");
5645 strcat (buf, "%B3");
5647 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
5649 strcat (buf, " %2,%r1,%%r0\n\tb %0");
5655 xoperands[0] = operands[0];
5656 xoperands[1] = operands[1];
5657 xoperands[2] = operands[2];
5658 xoperands[3] = operands[3];
5660 /* The reversed conditional branch must branch over one additional
5661 instruction if the delay slot is filled. If the delay slot
5662 is empty, the instruction after the reversed condition branch
5663 must be nullified. */
5664 nullify = dbr_sequence_length () == 0;
5665 xoperands[4] = nullify ? GEN_INT (length) : GEN_INT (length + 4);
5667 /* Create a reversed conditional branch which branches around
5668 the following insns. */
5669 if (GET_MODE (operands[1]) != DImode)
5675 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
5678 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
5684 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
5687 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
5696 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
5699 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
5705 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
5708 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
5712 output_asm_insn (buf, xoperands);
5713 return output_lbranch (operands[0], insn);
5721 /* This routine handles long unconditional branches that exceed the
5722 maximum range of a simple branch instruction. */
5725 output_lbranch (dest, insn)
5730 xoperands[0] = dest;
5732 /* First, free up the delay slot. */
5733 if (dbr_sequence_length () != 0)
5735 /* We can't handle a jump in the delay slot. */
5736 if (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN)
5739 final_scan_insn (NEXT_INSN (insn), asm_out_file,
5742 /* Now delete the delay insn. */
5743 PUT_CODE (NEXT_INSN (insn), NOTE);
5744 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5745 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5748 /* Output an insn to save %r1. The runtime documentation doesn't
5749 specify whether the "Clean Up" slot in the callers frame can
5750 be clobbered by the callee. It isn't copied by HP's builtin
5751 alloca, so this suggests that it can be clobbered if necessary.
5752 The "Static Link" location is copied by HP builtin alloca, so
5753 we avoid using it. Using the cleanup slot might be a problem
5754 if we have to interoperate with languages that pass cleanup
5755 information. However, it should be possible to handle these
5756 situations with GCC's asm feature.
5758 The "Current RP" slot is reserved for the called procedure, so
5759 we try to use it when we don't have a frame of our own. It's
5760 rather unlikely that we won't have a frame when we need to emit
5763 Really the way to go long term is a register scavenger; goto
5764 the target of the jump and find a register which we can use
5765 as a scratch to hold the value in %r1. Then, we wouldn't have
5766 to free up the delay slot or clobber a slot that may be needed
5767 for other purposes. */
5770 if (actual_fsize == 0 && !regs_ever_live[2])
5771 /* Use the return pointer slot in the frame marker. */
5772 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
5774 /* Use the slot at -40 in the frame marker since HP builtin
5775 alloca doesn't copy it. */
5776 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
5780 if (actual_fsize == 0 && !regs_ever_live[2])
5781 /* Use the return pointer slot in the frame marker. */
5782 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
5784 /* Use the "Clean Up" slot in the frame marker. In GCC,
5785 the only other use of this location is for copying a
5786 floating point double argument from a floating-point
5787 register to two general registers. The copy is done
5788 as an "atomic" operation when outputing a call, so it
5789 won't interfere with our using the location here. */
5790 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
5795 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
5796 if (TARGET_SOM || !TARGET_GAS)
5798 xoperands[1] = gen_label_rtx ();
5799 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
5800 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5801 CODE_LABEL_NUMBER (xoperands[1]));
5802 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
5806 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
5807 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
5809 output_asm_insn ("bv %%r0(%%r1)", xoperands);
5812 /* Now output a very long branch to the original target. */
5813 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
5815 /* Now restore the value of %r1 in the delay slot. */
5818 if (actual_fsize == 0 && !regs_ever_live[2])
5819 return "ldd -16(%%r30),%%r1";
5821 return "ldd -40(%%r30),%%r1";
5825 if (actual_fsize == 0 && !regs_ever_live[2])
5826 return "ldw -20(%%r30),%%r1";
5828 return "ldw -12(%%r30),%%r1";
5832 /* This routine handles all the branch-on-bit conditional branch sequences we
5833 might need to generate. It handles nullification of delay slots,
5834 varying length branches, negated branches and all combinations of the
5835 above. it returns the appropriate output template to emit the branch. */
5838 output_bb (operands, nullify, length, negated, insn, which)
5839 rtx *operands ATTRIBUTE_UNUSED;
5840 int nullify, length, negated;
5844 static char buf[100];
5847 /* A conditional branch to the following instruction (eg the delay slot) is
5848 asking for a disaster. I do not think this can happen as this pattern
5849 is only used when optimizing; jump optimization should eliminate the
5850 jump. But be prepared just in case. */
5852 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
5855 /* If this is a long branch with its delay slot unfilled, set `nullify'
5856 as it can nullify the delay slot and save a nop. */
5857 if (length == 8 && dbr_sequence_length () == 0)
5860 /* If this is a short forward conditional branch which did not get
5861 its delay slot filled, the delay slot can still be nullified. */
5862 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5863 nullify = forward_branch_p (insn);
5865 /* A forward branch over a single nullified insn can be done with a
5866 extrs instruction. This avoids a single cycle penalty due to
5867 mis-predicted branch if we fall through (branch not taken). */
5870 && next_real_insn (insn) != 0
5871 && get_attr_length (next_real_insn (insn)) == 4
5872 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5879 /* All short conditional branches except backwards with an unfilled
5883 strcpy (buf, "{extrs,|extrw,s,}");
5885 strcpy (buf, "bb,");
5886 if (useskip && GET_MODE (operands[0]) == DImode)
5887 strcpy (buf, "extrd,s,*");
5888 else if (GET_MODE (operands[0]) == DImode)
5889 strcpy (buf, "bb,*");
5890 if ((which == 0 && negated)
5891 || (which == 1 && ! negated))
5896 strcat (buf, " %0,%1,1,%%r0");
5897 else if (nullify && negated)
5898 strcat (buf, ",n %0,%1,%3");
5899 else if (nullify && ! negated)
5900 strcat (buf, ",n %0,%1,%2");
5901 else if (! nullify && negated)
5902 strcat (buf, "%0,%1,%3");
5903 else if (! nullify && ! negated)
5904 strcat (buf, " %0,%1,%2");
5907 /* All long conditionals. Note a short backward branch with an
5908 unfilled delay slot is treated just like a long backward branch
5909 with an unfilled delay slot. */
5911 /* Handle weird backwards branch with a filled delay slot
5912 with is nullified. */
5913 if (dbr_sequence_length () != 0
5914 && ! forward_branch_p (insn)
5917 strcpy (buf, "bb,");
5918 if (GET_MODE (operands[0]) == DImode)
5920 if ((which == 0 && negated)
5921 || (which == 1 && ! negated))
5926 strcat (buf, ",n %0,%1,.+12\n\tb %3");
5928 strcat (buf, ",n %0,%1,.+12\n\tb %2");
5930 /* Handle short backwards branch with an unfilled delay slot.
5931 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5932 taken and untaken branches. */
5933 else if (dbr_sequence_length () == 0
5934 && ! forward_branch_p (insn)
5935 && INSN_ADDRESSES_SET_P ()
5936 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5937 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5939 strcpy (buf, "bb,");
5940 if (GET_MODE (operands[0]) == DImode)
5942 if ((which == 0 && negated)
5943 || (which == 1 && ! negated))
5948 strcat (buf, " %0,%1,%3%#");
5950 strcat (buf, " %0,%1,%2%#");
5954 strcpy (buf, "{extrs,|extrw,s,}");
5955 if (GET_MODE (operands[0]) == DImode)
5956 strcpy (buf, "extrd,s,*");
5957 if ((which == 0 && negated)
5958 || (which == 1 && ! negated))
5962 if (nullify && negated)
5963 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
5964 else if (nullify && ! negated)
5965 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
5967 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
5969 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
5979 /* This routine handles all the branch-on-variable-bit conditional branch
5980 sequences we might need to generate. It handles nullification of delay
5981 slots, varying length branches, negated branches and all combinations
5982 of the above. it returns the appropriate output template to emit the
5986 output_bvb (operands, nullify, length, negated, insn, which)
5987 rtx *operands ATTRIBUTE_UNUSED;
5988 int nullify, length, negated;
5992 static char buf[100];
5995 /* A conditional branch to the following instruction (eg the delay slot) is
5996 asking for a disaster. I do not think this can happen as this pattern
5997 is only used when optimizing; jump optimization should eliminate the
5998 jump. But be prepared just in case. */
6000 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6003 /* If this is a long branch with its delay slot unfilled, set `nullify'
6004 as it can nullify the delay slot and save a nop. */
6005 if (length == 8 && dbr_sequence_length () == 0)
6008 /* If this is a short forward conditional branch which did not get
6009 its delay slot filled, the delay slot can still be nullified. */
6010 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6011 nullify = forward_branch_p (insn);
6013 /* A forward branch over a single nullified insn can be done with a
6014 extrs instruction. This avoids a single cycle penalty due to
6015 mis-predicted branch if we fall through (branch not taken). */
6018 && next_real_insn (insn) != 0
6019 && get_attr_length (next_real_insn (insn)) == 4
6020 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6027 /* All short conditional branches except backwards with an unfilled
6031 strcpy (buf, "{vextrs,|extrw,s,}");
6033 strcpy (buf, "{bvb,|bb,}");
6034 if (useskip && GET_MODE (operands[0]) == DImode)
6035 strcpy (buf, "extrd,s,*}");
6036 else if (GET_MODE (operands[0]) == DImode)
6037 strcpy (buf, "bb,*");
6038 if ((which == 0 && negated)
6039 || (which == 1 && ! negated))
6044 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6045 else if (nullify && negated)
6046 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6047 else if (nullify && ! negated)
6048 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6049 else if (! nullify && negated)
6050 strcat (buf, "{%0,%3|%0,%%sar,%3}");
6051 else if (! nullify && ! negated)
6052 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6055 /* All long conditionals. Note a short backward branch with an
6056 unfilled delay slot is treated just like a long backward branch
6057 with an unfilled delay slot. */
6059 /* Handle weird backwards branch with a filled delay slot
6060 with is nullified. */
6061 if (dbr_sequence_length () != 0
6062 && ! forward_branch_p (insn)
6065 strcpy (buf, "{bvb,|bb,}");
6066 if (GET_MODE (operands[0]) == DImode)
6068 if ((which == 0 && negated)
6069 || (which == 1 && ! negated))
6074 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6076 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6078 /* Handle short backwards branch with an unfilled delay slot.
6079 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6080 taken and untaken branches. */
6081 else if (dbr_sequence_length () == 0
6082 && ! forward_branch_p (insn)
6083 && INSN_ADDRESSES_SET_P ()
6084 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6085 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6087 strcpy (buf, "{bvb,|bb,}");
6088 if (GET_MODE (operands[0]) == DImode)
6090 if ((which == 0 && negated)
6091 || (which == 1 && ! negated))
6096 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6098 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6102 strcpy (buf, "{vextrs,|extrw,s,}");
6103 if (GET_MODE (operands[0]) == DImode)
6104 strcpy (buf, "extrd,s,*");
6105 if ((which == 0 && negated)
6106 || (which == 1 && ! negated))
6110 if (nullify && negated)
6111 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6112 else if (nullify && ! negated)
6113 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6115 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6117 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6127 /* Return the output template for emitting a dbra type insn.
6129 Note it may perform some output operations on its own before
6130 returning the final output string. */
6132 output_dbra (operands, insn, which_alternative)
6135 int which_alternative;
6138 /* A conditional branch to the following instruction (eg the delay slot) is
6139 asking for a disaster. Be prepared! */
6141 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6143 if (which_alternative == 0)
6144 return "ldo %1(%0),%0";
6145 else if (which_alternative == 1)
6147 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6148 output_asm_insn ("ldw -16(%%r30),%4", operands);
6149 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6150 return "{fldws|fldw} -16(%%r30),%0";
6154 output_asm_insn ("ldw %0,%4", operands);
6155 return "ldo %1(%4),%4\n\tstw %4,%0";
6159 if (which_alternative == 0)
6161 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6162 int length = get_attr_length (insn);
6164 /* If this is a long branch with its delay slot unfilled, set `nullify'
6165 as it can nullify the delay slot and save a nop. */
6166 if (length == 8 && dbr_sequence_length () == 0)
6169 /* If this is a short forward conditional branch which did not get
6170 its delay slot filled, the delay slot can still be nullified. */
6171 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6172 nullify = forward_branch_p (insn);
6174 /* Handle short versions first. */
6175 if (length == 4 && nullify)
6176 return "addib,%C2,n %1,%0,%3";
6177 else if (length == 4 && ! nullify)
6178 return "addib,%C2 %1,%0,%3";
6179 else if (length == 8)
6181 /* Handle weird backwards branch with a fulled delay slot
6182 which is nullified. */
6183 if (dbr_sequence_length () != 0
6184 && ! forward_branch_p (insn)
6186 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6187 /* Handle short backwards branch with an unfilled delay slot.
6188 Using a addb;nop rather than addi;bl saves 1 cycle for both
6189 taken and untaken branches. */
6190 else if (dbr_sequence_length () == 0
6191 && ! forward_branch_p (insn)
6192 && INSN_ADDRESSES_SET_P ()
6193 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6194 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6195 return "addib,%C2 %1,%0,%3%#";
6197 /* Handle normal cases. */
6199 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6201 return "addi,%N2 %1,%0,%0\n\tb %3";
6206 /* Deal with gross reload from FP register case. */
6207 else if (which_alternative == 1)
6209 /* Move loop counter from FP register to MEM then into a GR,
6210 increment the GR, store the GR into MEM, and finally reload
6211 the FP register from MEM from within the branch's delay slot. */
6212 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6214 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6215 if (get_attr_length (insn) == 24)
6216 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6218 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6220 /* Deal with gross reload from memory case. */
6223 /* Reload loop counter from memory, the store back to memory
6224 happens in the branch's delay slot. */
6225 output_asm_insn ("ldw %0,%4", operands);
6226 if (get_attr_length (insn) == 12)
6227 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6229 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6233 /* Return the output template for emitting a dbra type insn.
6235 Note it may perform some output operations on its own before
6236 returning the final output string. */
6238 output_movb (operands, insn, which_alternative, reverse_comparison)
6241 int which_alternative;
6242 int reverse_comparison;
6245 /* A conditional branch to the following instruction (eg the delay slot) is
6246 asking for a disaster. Be prepared! */
6248 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6250 if (which_alternative == 0)
6251 return "copy %1,%0";
6252 else if (which_alternative == 1)
6254 output_asm_insn ("stw %1,-16(%%r30)", operands);
6255 return "{fldws|fldw} -16(%%r30),%0";
6257 else if (which_alternative == 2)
6263 /* Support the second variant. */
6264 if (reverse_comparison)
6265 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6267 if (which_alternative == 0)
6269 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6270 int length = get_attr_length (insn);
6272 /* If this is a long branch with its delay slot unfilled, set `nullify'
6273 as it can nullify the delay slot and save a nop. */
6274 if (length == 8 && dbr_sequence_length () == 0)
6277 /* If this is a short forward conditional branch which did not get
6278 its delay slot filled, the delay slot can still be nullified. */
6279 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6280 nullify = forward_branch_p (insn);
6282 /* Handle short versions first. */
6283 if (length == 4 && nullify)
6284 return "movb,%C2,n %1,%0,%3";
6285 else if (length == 4 && ! nullify)
6286 return "movb,%C2 %1,%0,%3";
6287 else if (length == 8)
6289 /* Handle weird backwards branch with a filled delay slot
6290 which is nullified. */
6291 if (dbr_sequence_length () != 0
6292 && ! forward_branch_p (insn)
6294 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6296 /* Handle short backwards branch with an unfilled delay slot.
6297 Using a movb;nop rather than or;bl saves 1 cycle for both
6298 taken and untaken branches. */
6299 else if (dbr_sequence_length () == 0
6300 && ! forward_branch_p (insn)
6301 && INSN_ADDRESSES_SET_P ()
6302 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6303 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6304 return "movb,%C2 %1,%0,%3%#";
6305 /* Handle normal cases. */
6307 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6309 return "or,%N2 %1,%%r0,%0\n\tb %3";
6314 /* Deal with gross reload from FP register case. */
6315 else if (which_alternative == 1)
6317 /* Move loop counter from FP register to MEM then into a GR,
6318 increment the GR, store the GR into MEM, and finally reload
6319 the FP register from MEM from within the branch's delay slot. */
6320 output_asm_insn ("stw %1,-16(%%r30)", operands);
6321 if (get_attr_length (insn) == 12)
6322 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6324 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6326 /* Deal with gross reload from memory case. */
6327 else if (which_alternative == 2)
6329 /* Reload loop counter from memory, the store back to memory
6330 happens in the branch's delay slot. */
6331 if (get_attr_length (insn) == 8)
6332 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6334 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6336 /* Handle SAR as a destination. */
6339 if (get_attr_length (insn) == 8)
6340 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6342 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
6346 /* Copy any FP arguments in INSN into integer registers. */
6354 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6356 int arg_mode, regno;
6357 rtx use = XEXP (link, 0);
6359 if (! (GET_CODE (use) == USE
6360 && GET_CODE (XEXP (use, 0)) == REG
6361 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6364 arg_mode = GET_MODE (XEXP (use, 0));
6365 regno = REGNO (XEXP (use, 0));
6367 /* Is it a floating point register? */
6368 if (regno >= 32 && regno <= 39)
6370 /* Copy the FP register into an integer register via memory. */
6371 if (arg_mode == SFmode)
6373 xoperands[0] = XEXP (use, 0);
6374 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6375 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
6376 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6380 xoperands[0] = XEXP (use, 0);
6381 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6382 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
6383 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6384 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6390 /* Compute length of the FP argument copy sequence for INSN. */
6392 length_fp_args (insn)
6398 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6400 int arg_mode, regno;
6401 rtx use = XEXP (link, 0);
6403 if (! (GET_CODE (use) == USE
6404 && GET_CODE (XEXP (use, 0)) == REG
6405 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6408 arg_mode = GET_MODE (XEXP (use, 0));
6409 regno = REGNO (XEXP (use, 0));
6411 /* Is it a floating point register? */
6412 if (regno >= 32 && regno <= 39)
6414 if (arg_mode == SFmode)
6424 /* Return the attribute length for the millicode call instruction INSN.
6425 The length must match the code generated by output_millicode_call.
6426 We include the delay slot in the returned length as it is better to
6427 over estimate the length than to under estimate it. */
6430 attr_length_millicode_call (insn)
6433 unsigned long distance = -1;
6435 if (INSN_ADDRESSES_SET_P ())
6437 distance = (total_code_bytes + insn_current_reference_address (insn));
6438 if (distance < total_code_bytes)
6444 if (!TARGET_LONG_CALLS && distance < 7600000)
6449 else if (TARGET_PORTABLE_RUNTIME)
6453 if (!TARGET_LONG_CALLS && distance < 240000)
6456 if (TARGET_LONG_ABS_CALL && !flag_pic)
6463 /* INSN is a function call. It may have an unconditional jump
6466 CALL_DEST is the routine we are calling. */
6469 output_millicode_call (insn, call_dest)
6473 int attr_length = get_attr_length (insn);
6474 int seq_length = dbr_sequence_length ();
6479 xoperands[0] = call_dest;
6480 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6482 /* Handle the common case where we are sure that the branch will
6483 reach the beginning of the $CODE$ subspace. The within reach
6484 form of the $$sh_func_adrs call has a length of 28. Because
6485 it has an attribute type of multi, it never has a nonzero
6486 sequence length. The length of the $$sh_func_adrs is the same
6487 as certain out of reach PIC calls to other routines. */
6488 if (!TARGET_LONG_CALLS
6489 && ((seq_length == 0
6490 && (attr_length == 12
6491 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
6492 || (seq_length != 0 && attr_length == 8)))
6494 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
6500 /* It might seem that one insn could be saved by accessing
6501 the millicode function using the linkage table. However,
6502 this doesn't work in shared libraries and other dynamically
6503 loaded objects. Using a pc-relative sequence also avoids
6504 problems related to the implicit use of the gp register. */
6505 output_asm_insn ("b,l .+8,%%r1", xoperands);
6509 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
6510 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6514 xoperands[1] = gen_label_rtx ();
6515 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6516 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6517 CODE_LABEL_NUMBER (xoperands[1]));
6518 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6521 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6523 else if (TARGET_PORTABLE_RUNTIME)
6525 /* Pure portable runtime doesn't allow be/ble; we also don't
6526 have PIC support in the assembler/linker, so this sequence
6529 /* Get the address of our target into %r1. */
6530 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6531 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6533 /* Get our return address into %r31. */
6534 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
6535 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
6537 /* Jump to our target address in %r1. */
6538 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6542 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6544 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
6546 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6550 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6551 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
6553 if (TARGET_SOM || !TARGET_GAS)
6555 /* The HP assembler can generate relocations for the
6556 difference of two symbols. GAS can do this for a
6557 millicode symbol but not an arbitrary external
6558 symbol when generating SOM output. */
6559 xoperands[1] = gen_label_rtx ();
6560 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6561 CODE_LABEL_NUMBER (xoperands[1]));
6562 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6563 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6567 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
6568 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
6572 /* Jump to our target address in %r1. */
6573 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6577 if (seq_length == 0)
6578 output_asm_insn ("nop", xoperands);
6580 /* We are done if there isn't a jump in the delay slot. */
6581 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6584 /* This call has an unconditional jump in its delay slot. */
6585 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6587 /* See if the return address can be adjusted. Use the containing
6588 sequence insn's address. */
6589 if (INSN_ADDRESSES_SET_P ())
6591 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6592 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6593 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
6595 if (VAL_14_BITS_P (distance))
6597 xoperands[1] = gen_label_rtx ();
6598 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
6599 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6600 CODE_LABEL_NUMBER (xoperands[1]));
6603 /* ??? This branch may not reach its target. */
6604 output_asm_insn ("nop\n\tb,n %0", xoperands);
6607 /* ??? This branch may not reach its target. */
6608 output_asm_insn ("nop\n\tb,n %0", xoperands);
6610 /* Delete the jump. */
6611 PUT_CODE (NEXT_INSN (insn), NOTE);
6612 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6613 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6618 /* Return the attribute length of the call instruction INSN. The SIBCALL
6619 flag indicates whether INSN is a regular call or a sibling call. The
6620 length must match the code generated by output_call. We include the delay
6621 slot in the returned length as it is better to over estimate the length
6622 than to under estimate it. */
6625 attr_length_call (insn, sibcall)
6629 unsigned long distance = -1;
6631 if (INSN_ADDRESSES_SET_P ())
6633 distance = (total_code_bytes + insn_current_reference_address (insn));
6634 if (distance < total_code_bytes)
6640 if (!TARGET_LONG_CALLS
6641 && ((!sibcall && distance < 7600000) || distance < 240000))
6644 return (sibcall ? 28 : 24);
6648 if (!TARGET_LONG_CALLS
6649 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
6650 || distance < 240000))
6653 if (TARGET_LONG_ABS_CALL && !flag_pic)
6656 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
6657 || (TARGET_GAS && TARGET_LONG_PIC_PCREL_CALL))
6669 length += length_fp_args (insn);
6675 return (length + 32);
6677 if (!TARGET_NO_SPACE_REGS)
6683 return (length + 32);
6688 /* INSN is a function call. It may have an unconditional jump
6691 CALL_DEST is the routine we are calling. */
6694 output_call (insn, call_dest, sibcall)
6699 int delay_insn_deleted = 0;
6700 int delay_slot_filled = 0;
6701 int seq_length = dbr_sequence_length ();
6704 xoperands[0] = call_dest;
6706 /* Handle the common case where we're sure that the branch will reach
6707 the beginning of the $CODE$ subspace. */
6708 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
6710 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
6711 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
6717 /* ??? As far as I can tell, the HP linker doesn't support the
6718 long pc-relative sequence described in the 64-bit runtime
6719 architecture. So, we use a slightly longer indirect call. */
6720 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
6722 xoperands[0] = p->internal_label;
6723 xoperands[1] = gen_label_rtx ();
6725 /* If this isn't a sibcall, we put the load of %r27 into the
6726 delay slot. We can't do this in a sibcall as we don't
6727 have a second call-clobbered scratch register available. */
6729 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6732 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6735 /* Now delete the delay insn. */
6736 PUT_CODE (NEXT_INSN (insn), NOTE);
6737 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6738 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6739 delay_insn_deleted = 1;
6742 output_asm_insn ("addil LT'%0,%%r27", xoperands);
6743 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
6744 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
6748 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
6749 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
6750 output_asm_insn ("bve (%%r1)", xoperands);
6754 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
6755 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
6756 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
6757 delay_slot_filled = 1;
6762 int indirect_call = 0;
6764 /* Emit a long call. There are several different sequences
6765 of increasing length and complexity. In most cases,
6766 they don't allow an instruction in the delay slot. */
6767 if (!(TARGET_LONG_ABS_CALL && !flag_pic)
6768 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
6769 && !(TARGET_GAS && TARGET_LONG_PIC_PCREL_CALL))
6773 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6775 && (!TARGET_PA_20 || indirect_call))
6777 /* A non-jump insn in the delay slot. By definition we can
6778 emit this insn before the call (and in fact before argument
6780 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
6782 /* Now delete the delay insn. */
6783 PUT_CODE (NEXT_INSN (insn), NOTE);
6784 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6785 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6786 delay_insn_deleted = 1;
6789 if (TARGET_LONG_ABS_CALL && !flag_pic)
6791 /* This is the best sequence for making long calls in
6792 non-pic code. Unfortunately, GNU ld doesn't provide
6793 the stub needed for external calls, and GAS's support
6794 for this with the SOM linker is buggy. */
6795 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6797 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
6801 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
6804 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6806 output_asm_insn ("copy %%r31,%%r2", xoperands);
6807 delay_slot_filled = 1;
6812 if (TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
6814 /* The HP assembler and linker can handle relocations
6815 for the difference of two symbols. GAS and the HP
6816 linker can't do this when one of the symbols is
6818 xoperands[1] = gen_label_rtx ();
6819 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6820 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6821 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6822 CODE_LABEL_NUMBER (xoperands[1]));
6823 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6825 else if (TARGET_GAS && TARGET_LONG_PIC_PCREL_CALL)
6827 /* GAS currently can't generate the relocations that
6828 are needed for the SOM linker under HP-UX using this
6829 sequence. The GNU linker doesn't generate the stubs
6830 that are needed for external calls on TARGET_ELF32
6831 with this sequence. For now, we have to use a
6832 longer plabel sequence when using GAS. */
6833 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6834 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
6836 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
6841 /* Emit a long plabel-based call sequence. This is
6842 essentially an inline implementation of $$dyncall.
6843 We don't actually try to call $$dyncall as this is
6844 as difficult as calling the function itself. */
6845 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
6847 xoperands[0] = p->internal_label;
6848 xoperands[1] = gen_label_rtx ();
6850 /* Since the call is indirect, FP arguments in registers
6851 need to be copied to the general registers. Then, the
6852 argument relocation stub will copy them back. */
6854 copy_fp_args (insn);
6858 output_asm_insn ("addil LT'%0,%%r19", xoperands);
6859 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
6860 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
6864 output_asm_insn ("addil LR'%0-$global$,%%r27",
6866 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
6870 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
6871 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
6872 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
6873 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
6875 if (!sibcall && !TARGET_PA_20)
6877 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
6878 if (TARGET_NO_SPACE_REGS)
6879 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
6881 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
6888 output_asm_insn ("bve (%%r1)", xoperands);
6893 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6894 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
6895 delay_slot_filled = 1;
6898 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6903 if (!TARGET_NO_SPACE_REGS)
6904 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
6909 if (TARGET_NO_SPACE_REGS)
6910 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
6912 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
6916 if (TARGET_NO_SPACE_REGS)
6917 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
6919 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
6922 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
6924 output_asm_insn ("copy %%r31,%%r2", xoperands);
6925 delay_slot_filled = 1;
6932 if (seq_length == 0 || (delay_insn_deleted && !delay_slot_filled))
6933 output_asm_insn ("nop", xoperands);
6935 /* We are done if there isn't a jump in the delay slot. */
6937 || delay_insn_deleted
6938 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6941 /* A sibcall should never have a branch in the delay slot. */
6945 /* This call has an unconditional jump in its delay slot. */
6946 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6948 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
6950 /* See if the return address can be adjusted. Use the containing
6951 sequence insn's address. */
6952 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6953 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6954 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
6956 if (VAL_14_BITS_P (distance))
6958 xoperands[1] = gen_label_rtx ();
6959 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
6960 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6961 CODE_LABEL_NUMBER (xoperands[1]));
6964 /* ??? This branch may not reach its target. */
6965 output_asm_insn ("nop\n\tb,n %0", xoperands);
6968 /* ??? This branch may not reach its target. */
6969 output_asm_insn ("b,n %0", xoperands);
6971 /* Delete the jump. */
6972 PUT_CODE (NEXT_INSN (insn), NOTE);
6973 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6974 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6979 /* Return the attribute length of the indirect call instruction INSN.
6980 The length must match the code generated by output_indirect call.
6981 The returned length includes the delay slot. Currently, the delay
6982 slot of an indirect call sequence is not exposed and it is used by
6983 the sequence itself. */
6986 attr_length_indirect_call (insn)
6989 unsigned long distance = -1;
6991 if (INSN_ADDRESSES_SET_P ())
6993 distance = (total_code_bytes + insn_current_reference_address (insn));
6994 if (distance < total_code_bytes)
7001 if (TARGET_FAST_INDIRECT_CALLS
7002 || (!TARGET_PORTABLE_RUNTIME
7003 && ((TARGET_PA_20 && distance < 7600000) || distance < 240000)))
7009 if (TARGET_PORTABLE_RUNTIME)
7012 /* Out of reach, can use ble. */
7017 output_indirect_call (insn, call_dest)
7025 xoperands[0] = call_dest;
7026 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7027 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7031 /* First the special case for kernels, level 0 systems, etc. */
7032 if (TARGET_FAST_INDIRECT_CALLS)
7033 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7035 /* Now the normal case -- we can reach $$dyncall directly or
7036 we're sure that we can get there via a long-branch stub.
7038 No need to check target flags as the length uniquely identifies
7039 the remaining cases. */
7040 if (attr_length_indirect_call (insn) == 8)
7041 return ".CALL\tARGW0=GR\n\t{bl|b,l} $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7043 /* Long millicode call, but we are not generating PIC or portable runtime
7045 if (attr_length_indirect_call (insn) == 12)
7046 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7048 /* Long millicode call for portable runtime. */
7049 if (attr_length_indirect_call (insn) == 20)
7050 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7052 /* We need a long PIC call to $$dyncall. */
7053 xoperands[0] = NULL_RTX;
7054 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7055 if (TARGET_SOM || !TARGET_GAS)
7057 xoperands[0] = gen_label_rtx ();
7058 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7059 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7060 CODE_LABEL_NUMBER (xoperands[0]));
7061 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7065 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7066 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7069 output_asm_insn ("blr %%r0,%%r2", xoperands);
7070 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7074 /* Return the total length of the save and restore instructions needed for
7075 the data linkage table pointer (i.e., the PIC register) across the call
7076 instruction INSN. No-return calls do not require a save and restore.
7077 In addition, we may be able to avoid the save and restore for calls
7078 within the same translation unit. */
7081 attr_length_save_restore_dltp (insn)
7084 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7090 /* In HPUX 8.0's shared library scheme, special relocations are needed
7091 for function labels if they might be passed to a function
7092 in a shared library (because shared libraries don't live in code
7093 space), and special magic is needed to construct their address. */
7096 hppa_encode_label (sym)
7099 const char *str = XSTR (sym, 0);
7100 int len = strlen (str) + 1;
7103 p = newstr = alloca (len + 1);
7107 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7111 pa_encode_section_info (decl, rtl, first)
7116 if (first && TEXT_SPACE_P (decl))
7118 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7119 if (TREE_CODE (decl) == FUNCTION_DECL)
7120 hppa_encode_label (XEXP (rtl, 0));
7124 /* This is sort of inverse to pa_encode_section_info. */
7127 pa_strip_name_encoding (str)
7130 str += (*str == '@');
7131 str += (*str == '*');
7136 function_label_operand (op, mode)
7138 enum machine_mode mode ATTRIBUTE_UNUSED;
7140 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7143 /* Returns 1 if OP is a function label involved in a simple addition
7144 with a constant. Used to keep certain patterns from matching
7145 during instruction combination. */
7147 is_function_label_plus_const (op)
7150 /* Strip off any CONST. */
7151 if (GET_CODE (op) == CONST)
7154 return (GET_CODE (op) == PLUS
7155 && function_label_operand (XEXP (op, 0), Pmode)
7156 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7159 /* Output assembly code for a thunk to FUNCTION. */
7162 pa_asm_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
7165 HOST_WIDE_INT delta;
7166 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED;
7169 const char *target_name = XSTR (XEXP (DECL_RTL (function), 0), 0);
7170 static unsigned int current_thunk_number;
7173 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7174 lab = (*targetm.strip_name_encoding) (label);
7175 target_name = (*targetm.strip_name_encoding) (target_name);
7176 /* FIXME: total_code_bytes is not handled correctly in files with
7178 pa_output_function_prologue (file, 0);
7179 if (VAL_14_BITS_P (delta))
7181 if (!TARGET_64BIT && !TARGET_PORTABLE_RUNTIME && flag_pic)
7183 fprintf (file, "\taddil LT'%s,%%r19\n", lab);
7184 fprintf (file, "\tldw RT'%s(%%r1),%%r22\n", lab);
7185 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7186 fprintf (file, "\tbb,>=,n %%r22,30,.+16\n");
7187 fprintf (file, "\tdepi 0,31,2,%%r22\n");
7188 fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
7189 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7190 if (TARGET_NO_SPACE_REGS)
7191 fprintf (file, "\tbe 0(%%sr4,%%r22)\n\tldo ");
7194 fprintf (file, "\tldsid (%%sr0,%%r22),%%r1\n");
7195 fprintf (file, "\tmtsp %%r1,%%sr0\n");
7196 fprintf (file, "\tbe 0(%%sr0,%%r22)\n\tldo ");
7198 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7201 fprintf (file, "\tb %s\n\tldo " HOST_WIDE_INT_PRINT_DEC
7203 target_name, delta);
7207 if (!TARGET_64BIT && !TARGET_PORTABLE_RUNTIME && flag_pic)
7209 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7210 ",%%r26\n\tldo R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n",
7212 fprintf (file, "\taddil LT'%s,%%r19\n", lab);
7213 fprintf (file, "\tldw RT'%s(%%r1),%%r22\n", lab);
7214 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7215 fprintf (file, "\tbb,>=,n %%r22,30,.+16\n");
7216 fprintf (file, "\tdepi 0,31,2,%%r22\n");
7217 fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
7218 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7219 if (TARGET_NO_SPACE_REGS)
7220 fprintf (file, "\tbe 0(%%sr4,%%r22)");
7223 fprintf (file, "\tldsid (%%sr0,%%r22),%%r1\n");
7224 fprintf (file, "\tmtsp %%r1,%%sr0\n");
7225 fprintf (file, "\tbe,n 0(%%sr0,%%r22)\n");
7229 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7230 ",%%r26\n\tb %s\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7231 "(%%r1),%%r26\n", delta, target_name, delta);
7234 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
7235 if (! TARGET_64BIT && ! TARGET_PORTABLE_RUNTIME && flag_pic)
7238 fprintf (file, "\t.align 4\n");
7239 (*targetm.asm_out.internal_label) (file, "LTHN", current_thunk_number);
7240 fprintf (file, "\t.word P'%s\n", target_name);
7241 function_section (thunk_fndecl);
7243 current_thunk_number++;
7246 /* Only direct calls to static functions are allowed to be sibling (tail)
7249 This restriction is necessary because some linker generated stubs will
7250 store return pointers into rp' in some cases which might clobber a
7251 live value already in rp'.
7253 In a sibcall the current function and the target function share stack
7254 space. Thus if the path to the current function and the path to the
7255 target function save a value in rp', they save the value into the
7256 same stack slot, which has undesirable consequences.
7258 Because of the deferred binding nature of shared libraries any function
7259 with external scope could be in a different load module and thus require
7260 rp' to be saved when calling that function. So sibcall optimizations
7261 can only be safe for static function.
7263 Note that GCC never needs return value relocations, so we don't have to
7264 worry about static calls with return value relocations (which require
7267 It is safe to perform a sibcall optimization when the target function
7268 will never return. */
7270 pa_function_ok_for_sibcall (decl, exp)
7272 tree exp ATTRIBUTE_UNUSED;
7274 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
7275 single subspace mode and the call is not indirect. As far as I know,
7276 there is no operating system support for the multiple subspace mode.
7277 It might be possible to support indirect calls if we didn't use
7278 $$dyncall (see the indirect sequence generated in output_call). */
7280 return (decl != NULL_TREE);
7282 /* Sibcalls are not ok because the arg pointer register is not a fixed
7283 register. This prevents the sibcall optimization from occuring. In
7284 addition, there are problems with stub placement using GNU ld. This
7285 is because a normal sibcall branch uses a 17-bit relocation while
7286 a regular call branch uses a 22-bit relocation. As a result, more
7287 care needs to be taken in the placement of long-branch stubs. */
7292 && !TARGET_PORTABLE_RUNTIME
7293 && !TREE_PUBLIC (decl));
7296 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7297 use in fmpyadd instructions. */
7299 fmpyaddoperands (operands)
7302 enum machine_mode mode = GET_MODE (operands[0]);
7304 /* Must be a floating point mode. */
7305 if (mode != SFmode && mode != DFmode)
7308 /* All modes must be the same. */
7309 if (! (mode == GET_MODE (operands[1])
7310 && mode == GET_MODE (operands[2])
7311 && mode == GET_MODE (operands[3])
7312 && mode == GET_MODE (operands[4])
7313 && mode == GET_MODE (operands[5])))
7316 /* All operands must be registers. */
7317 if (! (GET_CODE (operands[1]) == REG
7318 && GET_CODE (operands[2]) == REG
7319 && GET_CODE (operands[3]) == REG
7320 && GET_CODE (operands[4]) == REG
7321 && GET_CODE (operands[5]) == REG))
7324 /* Only 2 real operands to the addition. One of the input operands must
7325 be the same as the output operand. */
7326 if (! rtx_equal_p (operands[3], operands[4])
7327 && ! rtx_equal_p (operands[3], operands[5]))
7330 /* Inout operand of add can not conflict with any operands from multiply. */
7331 if (rtx_equal_p (operands[3], operands[0])
7332 || rtx_equal_p (operands[3], operands[1])
7333 || rtx_equal_p (operands[3], operands[2]))
7336 /* multiply can not feed into addition operands. */
7337 if (rtx_equal_p (operands[4], operands[0])
7338 || rtx_equal_p (operands[5], operands[0]))
7341 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
7343 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
7344 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
7345 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
7346 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
7347 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
7348 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
7351 /* Passed. Operands are suitable for fmpyadd. */
7355 #if !defined(USE_COLLECT2)
7357 pa_asm_out_constructor (symbol, priority)
7361 if (!function_label_operand (symbol, VOIDmode))
7362 hppa_encode_label (symbol);
7364 #ifdef CTORS_SECTION_ASM_OP
7365 default_ctor_section_asm_out_constructor (symbol, priority);
7367 # ifdef TARGET_ASM_NAMED_SECTION
7368 default_named_section_asm_out_constructor (symbol, priority);
7370 default_stabs_asm_out_constructor (symbol, priority);
7376 pa_asm_out_destructor (symbol, priority)
7380 if (!function_label_operand (symbol, VOIDmode))
7381 hppa_encode_label (symbol);
7383 #ifdef DTORS_SECTION_ASM_OP
7384 default_dtor_section_asm_out_destructor (symbol, priority);
7386 # ifdef TARGET_ASM_NAMED_SECTION
7387 default_named_section_asm_out_destructor (symbol, priority);
7389 default_stabs_asm_out_destructor (symbol, priority);
7395 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7396 use in fmpysub instructions. */
7398 fmpysuboperands (operands)
7401 enum machine_mode mode = GET_MODE (operands[0]);
7403 /* Must be a floating point mode. */
7404 if (mode != SFmode && mode != DFmode)
7407 /* All modes must be the same. */
7408 if (! (mode == GET_MODE (operands[1])
7409 && mode == GET_MODE (operands[2])
7410 && mode == GET_MODE (operands[3])
7411 && mode == GET_MODE (operands[4])
7412 && mode == GET_MODE (operands[5])))
7415 /* All operands must be registers. */
7416 if (! (GET_CODE (operands[1]) == REG
7417 && GET_CODE (operands[2]) == REG
7418 && GET_CODE (operands[3]) == REG
7419 && GET_CODE (operands[4]) == REG
7420 && GET_CODE (operands[5]) == REG))
7423 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
7424 operation, so operands[4] must be the same as operand[3]. */
7425 if (! rtx_equal_p (operands[3], operands[4]))
7428 /* multiply can not feed into subtraction. */
7429 if (rtx_equal_p (operands[5], operands[0]))
7432 /* Inout operand of sub can not conflict with any operands from multiply. */
7433 if (rtx_equal_p (operands[3], operands[0])
7434 || rtx_equal_p (operands[3], operands[1])
7435 || rtx_equal_p (operands[3], operands[2]))
7438 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
7440 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
7441 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
7442 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
7443 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
7444 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
7445 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
7448 /* Passed. Operands are suitable for fmpysub. */
7453 plus_xor_ior_operator (op, mode)
7455 enum machine_mode mode ATTRIBUTE_UNUSED;
7457 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
7458 || GET_CODE (op) == IOR);
7461 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
7462 constants for shadd instructions. */
7464 shadd_constant_p (val)
7467 if (val == 2 || val == 4 || val == 8)
7473 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
7474 the valid constant for shadd instructions. */
7476 shadd_operand (op, mode)
7478 enum machine_mode mode ATTRIBUTE_UNUSED;
7480 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
7483 /* Return 1 if OP is valid as a base register in a reg + reg address. */
7486 basereg_operand (op, mode)
7488 enum machine_mode mode;
7490 /* cse will create some unscaled indexed addresses, however; it
7491 generally isn't a win on the PA, so avoid creating unscaled
7492 indexed addresses until after cse is finished. */
7493 if (!cse_not_expected)
7496 /* Allow any register when TARGET_NO_SPACE_REGS is in effect since
7497 we don't have to worry about the braindamaged implicit space
7498 register selection from the basereg. */
7499 if (TARGET_NO_SPACE_REGS)
7500 return (GET_CODE (op) == REG);
7502 /* While it's always safe to index off the frame pointer, it's not
7503 always profitable, particularly when the frame pointer is being
7505 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
7508 return (GET_CODE (op) == REG
7510 && register_operand (op, mode));
7513 /* Return 1 if this operand is anything other than a hard register. */
7516 non_hard_reg_operand (op, mode)
7518 enum machine_mode mode ATTRIBUTE_UNUSED;
7520 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
7523 /* Return 1 if INSN branches forward. Should be using insn_addresses
7524 to avoid walking through all the insns... */
7526 forward_branch_p (insn)
7529 rtx label = JUMP_LABEL (insn);
7536 insn = NEXT_INSN (insn);
7539 return (insn == label);
7542 /* Return 1 if OP is an equality comparison, else return 0. */
7544 eq_neq_comparison_operator (op, mode)
7546 enum machine_mode mode ATTRIBUTE_UNUSED;
7548 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
7551 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
7553 movb_comparison_operator (op, mode)
7555 enum machine_mode mode ATTRIBUTE_UNUSED;
7557 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
7558 || GET_CODE (op) == LT || GET_CODE (op) == GE);
7561 /* Return 1 if INSN is in the delay slot of a call instruction. */
7563 jump_in_call_delay (insn)
7567 if (GET_CODE (insn) != JUMP_INSN)
7570 if (PREV_INSN (insn)
7571 && PREV_INSN (PREV_INSN (insn))
7572 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
7574 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
7576 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
7577 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
7584 /* Output an unconditional move and branch insn. */
7587 output_parallel_movb (operands, length)
7591 /* These are the cases in which we win. */
7593 return "mov%I1b,tr %1,%0,%2";
7595 /* None of these cases wins, but they don't lose either. */
7596 if (dbr_sequence_length () == 0)
7598 /* Nothing in the delay slot, fake it by putting the combined
7599 insn (the copy or add) in the delay slot of a bl. */
7600 if (GET_CODE (operands[1]) == CONST_INT)
7601 return "b %2\n\tldi %1,%0";
7603 return "b %2\n\tcopy %1,%0";
7607 /* Something in the delay slot, but we've got a long branch. */
7608 if (GET_CODE (operands[1]) == CONST_INT)
7609 return "ldi %1,%0\n\tb %2";
7611 return "copy %1,%0\n\tb %2";
7615 /* Output an unconditional add and branch insn. */
7618 output_parallel_addb (operands, length)
7622 /* To make life easy we want operand0 to be the shared input/output
7623 operand and operand1 to be the readonly operand. */
7624 if (operands[0] == operands[1])
7625 operands[1] = operands[2];
7627 /* These are the cases in which we win. */
7629 return "add%I1b,tr %1,%0,%3";
7631 /* None of these cases win, but they don't lose either. */
7632 if (dbr_sequence_length () == 0)
7634 /* Nothing in the delay slot, fake it by putting the combined
7635 insn (the copy or add) in the delay slot of a bl. */
7636 return "b %3\n\tadd%I1 %1,%0,%0";
7640 /* Something in the delay slot, but we've got a long branch. */
7641 return "add%I1 %1,%0,%0\n\tb %3";
7645 /* Return nonzero if INSN (a jump insn) immediately follows a call
7646 to a named function. This is used to avoid filling the delay slot
7647 of the jump since it can usually be eliminated by modifying RP in
7648 the delay slot of the call. */
7651 following_call (insn)
7654 if (! TARGET_JUMP_IN_DELAY)
7657 /* Find the previous real insn, skipping NOTEs. */
7658 insn = PREV_INSN (insn);
7659 while (insn && GET_CODE (insn) == NOTE)
7660 insn = PREV_INSN (insn);
7662 /* Check for CALL_INSNs and millicode calls. */
7664 && ((GET_CODE (insn) == CALL_INSN
7665 && get_attr_type (insn) != TYPE_DYNCALL)
7666 || (GET_CODE (insn) == INSN
7667 && GET_CODE (PATTERN (insn)) != SEQUENCE
7668 && GET_CODE (PATTERN (insn)) != USE
7669 && GET_CODE (PATTERN (insn)) != CLOBBER
7670 && get_attr_type (insn) == TYPE_MILLI)))
7676 /* We use this hook to perform a PA specific optimization which is difficult
7677 to do in earlier passes.
7679 We want the delay slots of branches within jump tables to be filled.
7680 None of the compiler passes at the moment even has the notion that a
7681 PA jump table doesn't contain addresses, but instead contains actual
7684 Because we actually jump into the table, the addresses of each entry
7685 must stay constant in relation to the beginning of the table (which
7686 itself must stay constant relative to the instruction to jump into
7687 it). I don't believe we can guarantee earlier passes of the compiler
7688 will adhere to those rules.
7690 So, late in the compilation process we find all the jump tables, and
7691 expand them into real code -- eg each entry in the jump table vector
7692 will get an appropriate label followed by a jump to the final target.
7694 Reorg and the final jump pass can then optimize these branches and
7695 fill their delay slots. We end up with smaller, more efficient code.
7697 The jump instructions within the table are special; we must be able
7698 to identify them during assembly output (if the jumps don't get filled
7699 we need to emit a nop rather than nullifying the delay slot)). We
7700 identify jumps in switch tables by marking the SET with DImode.
7702 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
7703 insns. This serves two purposes, first it prevents jump.c from
7704 noticing that the last N entries in the table jump to the instruction
7705 immediately after the table and deleting the jumps. Second, those
7706 insns mark where we should emit .begin_brtab and .end_brtab directives
7707 when using GAS (allows for better link time optimizations). */
7714 remove_useless_addtr_insns (1);
7716 if (pa_cpu < PROCESSOR_8000)
7717 pa_combine_instructions ();
7720 /* This is fairly cheap, so always run it if optimizing. */
7721 if (optimize > 0 && !TARGET_BIG_SWITCH)
7723 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
7724 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7726 rtx pattern, tmp, location;
7727 unsigned int length, i;
7729 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
7730 if (GET_CODE (insn) != JUMP_INSN
7731 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
7732 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
7735 /* Emit marker for the beginning of the branch table. */
7736 emit_insn_before (gen_begin_brtab (), insn);
7738 pattern = PATTERN (insn);
7739 location = PREV_INSN (insn);
7740 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
7742 for (i = 0; i < length; i++)
7744 /* Emit a label before each jump to keep jump.c from
7745 removing this code. */
7746 tmp = gen_label_rtx ();
7747 LABEL_NUSES (tmp) = 1;
7748 emit_label_after (tmp, location);
7749 location = NEXT_INSN (location);
7751 if (GET_CODE (pattern) == ADDR_VEC)
7753 /* Emit the jump itself. */
7754 tmp = gen_jump (XEXP (XVECEXP (pattern, 0, i), 0));
7755 tmp = emit_jump_insn_after (tmp, location);
7756 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
7757 /* It is easy to rely on the branch table markers
7758 during assembly output to trigger the correct code
7759 for a switch table jump with an unfilled delay slot,
7761 However, that requires state and assumes that we look
7764 We can't make such assumptions when computing the length
7765 of instructions. Ugh. We could walk the insn chain to
7766 determine if this instruction is in a branch table, but
7767 that can get rather expensive, particularly during the
7768 branch shortening phase of the compiler.
7770 So instead we mark this jump as being special. This is
7771 far from ideal and knows that no code after this will
7772 muck around with the mode of the JUMP_INSN itself. */
7773 PUT_MODE (tmp, SImode);
7774 LABEL_NUSES (JUMP_LABEL (tmp))++;
7775 location = NEXT_INSN (location);
7779 /* Emit the jump itself. */
7780 tmp = gen_jump (XEXP (XVECEXP (pattern, 1, i), 0));
7781 tmp = emit_jump_insn_after (tmp, location);
7782 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0);
7783 /* It is easy to rely on the branch table markers
7784 during assembly output to trigger the correct code
7785 for a switch table jump with an unfilled delay slot,
7787 However, that requires state and assumes that we look
7790 We can't make such assumptions when computing the length
7791 of instructions. Ugh. We could walk the insn chain to
7792 determine if this instruction is in a branch table, but
7793 that can get rather expensive, particularly during the
7794 branch shortening phase of the compiler.
7796 So instead we mark this jump as being special. This is
7797 far from ideal and knows that no code after this will
7798 muck around with the mode of the JUMP_INSN itself. */
7799 PUT_MODE (tmp, SImode);
7800 LABEL_NUSES (JUMP_LABEL (tmp))++;
7801 location = NEXT_INSN (location);
7804 /* Emit a BARRIER after the jump. */
7805 emit_barrier_after (location);
7806 location = NEXT_INSN (location);
7809 /* Emit marker for the end of the branch table. */
7810 emit_insn_before (gen_end_brtab (), location);
7811 location = NEXT_INSN (location);
7812 emit_barrier_after (location);
7814 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
7820 /* Sill need an end_brtab insn. */
7821 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7823 /* Find an ADDR_VEC insn. */
7824 if (GET_CODE (insn) != JUMP_INSN
7825 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
7826 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
7829 /* Now generate markers for the beginning and end of the
7831 emit_insn_before (gen_begin_brtab (), insn);
7832 emit_insn_after (gen_end_brtab (), insn);
7837 /* The PA has a number of odd instructions which can perform multiple
7838 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
7839 it may be profitable to combine two instructions into one instruction
7840 with two outputs. It's not profitable PA2.0 machines because the
7841 two outputs would take two slots in the reorder buffers.
7843 This routine finds instructions which can be combined and combines
7844 them. We only support some of the potential combinations, and we
7845 only try common ways to find suitable instructions.
7847 * addb can add two registers or a register and a small integer
7848 and jump to a nearby (+-8k) location. Normally the jump to the
7849 nearby location is conditional on the result of the add, but by
7850 using the "true" condition we can make the jump unconditional.
7851 Thus addb can perform two independent operations in one insn.
7853 * movb is similar to addb in that it can perform a reg->reg
7854 or small immediate->reg copy and jump to a nearby (+-8k location).
7856 * fmpyadd and fmpysub can perform a FP multiply and either an
7857 FP add or FP sub if the operands of the multiply and add/sub are
7858 independent (there are other minor restrictions). Note both
7859 the fmpy and fadd/fsub can in theory move to better spots according
7860 to data dependencies, but for now we require the fmpy stay at a
7863 * Many of the memory operations can perform pre & post updates
7864 of index registers. GCC's pre/post increment/decrement addressing
7865 is far too simple to take advantage of all the possibilities. This
7866 pass may not be suitable since those insns may not be independent.
7868 * comclr can compare two ints or an int and a register, nullify
7869 the following instruction and zero some other register. This
7870 is more difficult to use as it's harder to find an insn which
7871 will generate a comclr than finding something like an unconditional
7872 branch. (conditional moves & long branches create comclr insns).
7874 * Most arithmetic operations can conditionally skip the next
7875 instruction. They can be viewed as "perform this operation
7876 and conditionally jump to this nearby location" (where nearby
7877 is an insns away). These are difficult to use due to the
7878 branch length restrictions. */
7881 pa_combine_instructions ()
7885 /* This can get expensive since the basic algorithm is on the
7886 order of O(n^2) (or worse). Only do it for -O2 or higher
7887 levels of optimization. */
7891 /* Walk down the list of insns looking for "anchor" insns which
7892 may be combined with "floating" insns. As the name implies,
7893 "anchor" instructions don't move, while "floating" insns may
7895 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
7896 new = make_insn_raw (new);
7898 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
7900 enum attr_pa_combine_type anchor_attr;
7901 enum attr_pa_combine_type floater_attr;
7903 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
7904 Also ignore any special USE insns. */
7905 if ((GET_CODE (anchor) != INSN
7906 && GET_CODE (anchor) != JUMP_INSN
7907 && GET_CODE (anchor) != CALL_INSN)
7908 || GET_CODE (PATTERN (anchor)) == USE
7909 || GET_CODE (PATTERN (anchor)) == CLOBBER
7910 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
7911 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
7914 anchor_attr = get_attr_pa_combine_type (anchor);
7915 /* See if anchor is an insn suitable for combination. */
7916 if (anchor_attr == PA_COMBINE_TYPE_FMPY
7917 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
7918 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
7919 && ! forward_branch_p (anchor)))
7923 for (floater = PREV_INSN (anchor);
7925 floater = PREV_INSN (floater))
7927 if (GET_CODE (floater) == NOTE
7928 || (GET_CODE (floater) == INSN
7929 && (GET_CODE (PATTERN (floater)) == USE
7930 || GET_CODE (PATTERN (floater)) == CLOBBER)))
7933 /* Anything except a regular INSN will stop our search. */
7934 if (GET_CODE (floater) != INSN
7935 || GET_CODE (PATTERN (floater)) == ADDR_VEC
7936 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
7942 /* See if FLOATER is suitable for combination with the
7944 floater_attr = get_attr_pa_combine_type (floater);
7945 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
7946 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
7947 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
7948 && floater_attr == PA_COMBINE_TYPE_FMPY))
7950 /* If ANCHOR and FLOATER can be combined, then we're
7951 done with this pass. */
7952 if (pa_can_combine_p (new, anchor, floater, 0,
7953 SET_DEST (PATTERN (floater)),
7954 XEXP (SET_SRC (PATTERN (floater)), 0),
7955 XEXP (SET_SRC (PATTERN (floater)), 1)))
7959 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
7960 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
7962 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
7964 if (pa_can_combine_p (new, anchor, floater, 0,
7965 SET_DEST (PATTERN (floater)),
7966 XEXP (SET_SRC (PATTERN (floater)), 0),
7967 XEXP (SET_SRC (PATTERN (floater)), 1)))
7972 if (pa_can_combine_p (new, anchor, floater, 0,
7973 SET_DEST (PATTERN (floater)),
7974 SET_SRC (PATTERN (floater)),
7975 SET_SRC (PATTERN (floater))))
7981 /* If we didn't find anything on the backwards scan try forwards. */
7983 && (anchor_attr == PA_COMBINE_TYPE_FMPY
7984 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
7986 for (floater = anchor; floater; floater = NEXT_INSN (floater))
7988 if (GET_CODE (floater) == NOTE
7989 || (GET_CODE (floater) == INSN
7990 && (GET_CODE (PATTERN (floater)) == USE
7991 || GET_CODE (PATTERN (floater)) == CLOBBER)))
7995 /* Anything except a regular INSN will stop our search. */
7996 if (GET_CODE (floater) != INSN
7997 || GET_CODE (PATTERN (floater)) == ADDR_VEC
7998 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8004 /* See if FLOATER is suitable for combination with the
8006 floater_attr = get_attr_pa_combine_type (floater);
8007 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8008 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8009 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8010 && floater_attr == PA_COMBINE_TYPE_FMPY))
8012 /* If ANCHOR and FLOATER can be combined, then we're
8013 done with this pass. */
8014 if (pa_can_combine_p (new, anchor, floater, 1,
8015 SET_DEST (PATTERN (floater)),
8016 XEXP (SET_SRC (PATTERN (floater)),
8018 XEXP (SET_SRC (PATTERN (floater)),
8025 /* FLOATER will be nonzero if we found a suitable floating
8026 insn for combination with ANCHOR. */
8028 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8029 || anchor_attr == PA_COMBINE_TYPE_FMPY))
8031 /* Emit the new instruction and delete the old anchor. */
8032 emit_insn_before (gen_rtx_PARALLEL
8034 gen_rtvec (2, PATTERN (anchor),
8035 PATTERN (floater))),
8038 PUT_CODE (anchor, NOTE);
8039 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8040 NOTE_SOURCE_FILE (anchor) = 0;
8042 /* Emit a special USE insn for FLOATER, then delete
8043 the floating insn. */
8044 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8045 delete_insn (floater);
8050 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8053 /* Emit the new_jump instruction and delete the old anchor. */
8055 = emit_jump_insn_before (gen_rtx_PARALLEL
8057 gen_rtvec (2, PATTERN (anchor),
8058 PATTERN (floater))),
8061 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8062 PUT_CODE (anchor, NOTE);
8063 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8064 NOTE_SOURCE_FILE (anchor) = 0;
8066 /* Emit a special USE insn for FLOATER, then delete
8067 the floating insn. */
8068 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8069 delete_insn (floater);
8077 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
8078 rtx new, anchor, floater;
8080 rtx dest, src1, src2;
8082 int insn_code_number;
8085 /* Create a PARALLEL with the patterns of ANCHOR and
8086 FLOATER, try to recognize it, then test constraints
8087 for the resulting pattern.
8089 If the pattern doesn't match or the constraints
8090 aren't met keep searching for a suitable floater
8092 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8093 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8094 INSN_CODE (new) = -1;
8095 insn_code_number = recog_memoized (new);
8096 if (insn_code_number < 0
8097 || (extract_insn (new), ! constrain_operands (1)))
8111 /* There's up to three operands to consider. One
8112 output and two inputs.
8114 The output must not be used between FLOATER & ANCHOR
8115 exclusive. The inputs must not be set between
8116 FLOATER and ANCHOR exclusive. */
8118 if (reg_used_between_p (dest, start, end))
8121 if (reg_set_between_p (src1, start, end))
8124 if (reg_set_between_p (src2, start, end))
8127 /* If we get here, then everything is good. */
8131 /* Return nonzero if references for INSN are delayed.
8133 Millicode insns are actually function calls with some special
8134 constraints on arguments and register usage.
8136 Millicode calls always expect their arguments in the integer argument
8137 registers, and always return their result in %r29 (ret1). They
8138 are expected to clobber their arguments, %r1, %r29, and the return
8139 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8141 This function tells reorg that the references to arguments and
8142 millicode calls do not appear to happen until after the millicode call.
8143 This allows reorg to put insns which set the argument registers into the
8144 delay slot of the millicode call -- thus they act more like traditional
8147 Note we can not consider side effects of the insn to be delayed because
8148 the branch and link insn will clobber the return pointer. If we happened
8149 to use the return pointer in the delay slot of the call, then we lose.
8151 get_attr_type will try to recognize the given insn, so make sure to
8152 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8155 insn_refs_are_delayed (insn)
8158 return ((GET_CODE (insn) == INSN
8159 && GET_CODE (PATTERN (insn)) != SEQUENCE
8160 && GET_CODE (PATTERN (insn)) != USE
8161 && GET_CODE (PATTERN (insn)) != CLOBBER
8162 && get_attr_type (insn) == TYPE_MILLI));
8165 /* On the HP-PA the value is found in register(s) 28(-29), unless
8166 the mode is SF or DF. Then the value is returned in fr4 (32).
8168 This must perform the same promotions as PROMOTE_MODE, else
8169 PROMOTE_FUNCTION_RETURN will not work correctly.
8171 Small structures must be returned in a PARALLEL on PA64 in order
8172 to match the HP Compiler ABI. */
8175 function_value (valtype, func)
8177 tree func ATTRIBUTE_UNUSED;
8179 enum machine_mode valmode;
8181 /* Aggregates with a size less than or equal to 128 bits are returned
8182 in GR 28(-29). They are left justified. The pad bits are undefined.
8183 Larger aggregates are returned in memory. */
8184 if (TARGET_64BIT && AGGREGATE_TYPE_P (valtype))
8188 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
8190 for (i = 0; i < ub; i++)
8192 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8193 gen_rtx_REG (DImode, 28 + i),
8198 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
8201 if ((INTEGRAL_TYPE_P (valtype)
8202 && TYPE_PRECISION (valtype) < BITS_PER_WORD)
8203 || POINTER_TYPE_P (valtype))
8204 valmode = word_mode;
8206 valmode = TYPE_MODE (valtype);
8208 if (TREE_CODE (valtype) == REAL_TYPE
8209 && TYPE_MODE (valtype) != TFmode
8210 && !TARGET_SOFT_FLOAT)
8211 return gen_rtx_REG (valmode, 32);
8213 return gen_rtx_REG (valmode, 28);
8216 /* Return the location of a parameter that is passed in a register or NULL
8217 if the parameter has any component that is passed in memory.
8219 This is new code and will be pushed to into the net sources after
8222 ??? We might want to restructure this so that it looks more like other
8225 function_arg (cum, mode, type, named)
8226 CUMULATIVE_ARGS *cum;
8227 enum machine_mode mode;
8229 int named ATTRIBUTE_UNUSED;
8231 int max_arg_words = (TARGET_64BIT ? 8 : 4);
8238 if (mode == VOIDmode)
8241 arg_size = FUNCTION_ARG_SIZE (mode, type);
8243 /* If this arg would be passed partially or totally on the stack, then
8244 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
8245 handle arguments which are split between regs and stack slots if
8246 the ABI mandates split arguments. */
8249 /* The 32-bit ABI does not split arguments. */
8250 if (cum->words + arg_size > max_arg_words)
8256 alignment = cum->words & 1;
8257 if (cum->words + alignment >= max_arg_words)
8261 /* The 32bit ABIs and the 64bit ABIs are rather different,
8262 particularly in their handling of FP registers. We might
8263 be able to cleverly share code between them, but I'm not
8264 going to bother in the hope that splitting them up results
8265 in code that is more easily understood. */
8269 /* Advance the base registers to their current locations.
8271 Remember, gprs grow towards smaller register numbers while
8272 fprs grow to higher register numbers. Also remember that
8273 although FP regs are 32-bit addressable, we pretend that
8274 the registers are 64-bits wide. */
8275 gpr_reg_base = 26 - cum->words;
8276 fpr_reg_base = 32 + cum->words;
8278 /* Arguments wider than one word and small aggregates need special
8282 || (type && AGGREGATE_TYPE_P (type)))
8284 /* Double-extended precision (80-bit), quad-precision (128-bit)
8285 and aggregates including complex numbers are aligned on
8286 128-bit boundaries. The first eight 64-bit argument slots
8287 are associated one-to-one, with general registers r26
8288 through r19, and also with floating-point registers fr4
8289 through fr11. Arguments larger than one word are always
8290 passed in general registers.
8292 Using a PARALLEL with a word mode register results in left
8293 justified data on a big-endian target. */
8296 int i, offset = 0, ub = arg_size;
8298 /* Align the base register. */
8299 gpr_reg_base -= alignment;
8301 ub = MIN (ub, max_arg_words - cum->words - alignment);
8302 for (i = 0; i < ub; i++)
8304 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8305 gen_rtx_REG (DImode, gpr_reg_base),
8311 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
8316 /* If the argument is larger than a word, then we know precisely
8317 which registers we must use. */
8331 /* Structures 5 to 8 bytes in size are passed in the general
8332 registers in the same manner as other non floating-point
8333 objects. The data is right-justified and zero-extended
8336 This is magic. Normally, using a PARALLEL results in left
8337 justified data on a big-endian target. However, using a
8338 single double-word register provides the required right
8339 justication for 5 to 8 byte structures. This has nothing
8340 to do with the direction of padding specified for the argument.
8341 It has to do with how the data is widened and shifted into
8342 and from the register.
8344 Aside from adding load_multiple and store_multiple patterns,
8345 this is the only way that I have found to obtain right
8346 justification of BLKmode data when it has a size greater
8347 than one word. Splitting the operation into two SImode loads
8348 or returning a DImode REG results in left justified data. */
8349 if (mode == BLKmode)
8351 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
8352 gen_rtx_REG (DImode, gpr_reg_base),
8354 return gen_rtx_PARALLEL (mode, gen_rtvec (1, loc));
8359 /* We have a single word (32 bits). A simple computation
8360 will get us the register #s we need. */
8361 gpr_reg_base = 26 - cum->words;
8362 fpr_reg_base = 32 + 2 * cum->words;
8366 /* Determine if the argument needs to be passed in both general and
8367 floating point registers. */
8368 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
8369 /* If we are doing soft-float with portable runtime, then there
8370 is no need to worry about FP regs. */
8371 && !TARGET_SOFT_FLOAT
8372 /* The parameter must be some kind of float, else we can just
8373 pass it in integer registers. */
8374 && FLOAT_MODE_P (mode)
8375 /* The target function must not have a prototype. */
8376 && cum->nargs_prototype <= 0
8377 /* libcalls do not need to pass items in both FP and general
8379 && type != NULL_TREE
8380 /* All this hair applies to "outgoing" args only. This includes
8381 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
8383 /* Also pass outgoing floating arguments in both registers in indirect
8384 calls with the 32 bit ABI and the HP assembler since there is no
8385 way to the specify argument locations in static functions. */
8390 && FLOAT_MODE_P (mode)))
8396 gen_rtx_EXPR_LIST (VOIDmode,
8397 gen_rtx_REG (mode, fpr_reg_base),
8399 gen_rtx_EXPR_LIST (VOIDmode,
8400 gen_rtx_REG (mode, gpr_reg_base),
8405 /* See if we should pass this parameter in a general register. */
8406 if (TARGET_SOFT_FLOAT
8407 /* Indirect calls in the normal 32bit ABI require all arguments
8408 to be passed in general registers. */
8409 || (!TARGET_PORTABLE_RUNTIME
8413 /* If the parameter is not a floating point parameter, then
8414 it belongs in GPRs. */
8415 || !FLOAT_MODE_P (mode))
8416 retval = gen_rtx_REG (mode, gpr_reg_base);
8418 retval = gen_rtx_REG (mode, fpr_reg_base);
8424 /* If this arg would be passed totally in registers or totally on the stack,
8425 then this routine should return zero. It is currently called only for
8426 the 64-bit target. */
8428 function_arg_partial_nregs (cum, mode, type, named)
8429 CUMULATIVE_ARGS *cum;
8430 enum machine_mode mode;
8432 int named ATTRIBUTE_UNUSED;
8434 unsigned int max_arg_words = 8;
8435 unsigned int offset = 0;
8437 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
8440 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
8441 /* Arg fits fully into registers. */
8443 else if (cum->words + offset >= max_arg_words)
8444 /* Arg fully on the stack. */
8448 return max_arg_words - cum->words - offset;
8452 /* Return 1 if this is a comparison operator. This allows the use of
8453 MATCH_OPERATOR to recognize all the branch insns. */
8456 cmpib_comparison_operator (op, mode)
8458 enum machine_mode mode;
8460 return ((mode == VOIDmode || GET_MODE (op) == mode)
8461 && (GET_CODE (op) == EQ
8462 || GET_CODE (op) == NE
8463 || GET_CODE (op) == GT
8464 || GET_CODE (op) == GTU
8465 || GET_CODE (op) == GE
8466 || GET_CODE (op) == LT
8467 || GET_CODE (op) == LE
8468 || GET_CODE (op) == LEU));
8471 /* On hpux10, the linker will give an error if we have a reference
8472 in the read-only data section to a symbol defined in a shared
8473 library. Therefore, expressions that might require a reloc can
8474 not be placed in the read-only data section. */
8477 pa_select_section (exp, reloc, align)
8480 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED;
8482 if (TREE_CODE (exp) == VAR_DECL
8483 && TREE_READONLY (exp)
8484 && !TREE_THIS_VOLATILE (exp)
8485 && DECL_INITIAL (exp)
8486 && (DECL_INITIAL (exp) == error_mark_node
8487 || TREE_CONSTANT (DECL_INITIAL (exp)))
8489 readonly_data_section ();
8490 else if (TREE_CODE_CLASS (TREE_CODE (exp)) == 'c'
8491 && !(TREE_CODE (exp) == STRING_CST && flag_writable_strings)
8493 readonly_data_section ();
8499 pa_globalize_label (stream, name)
8503 /* We only handle DATA objects here, functions are globalized in
8504 ASM_DECLARE_FUNCTION_NAME. */
8505 if (! FUNCTION_NAME_P (name))
8507 fputs ("\t.EXPORT ", stream);
8508 assemble_name (stream, name);
8509 fputs (",DATA\n", stream);