1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
42 #include "integrate.h"
51 #include "target-def.h"
53 static int hppa_use_dfa_pipeline_interface (void);
55 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
56 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hppa_use_dfa_pipeline_interface
59 hppa_use_dfa_pipeline_interface (void)
64 /* Return nonzero if there is a bypass for the output of
65 OUT_INSN and the fp store IN_INSN. */
67 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
69 enum machine_mode store_mode;
70 enum machine_mode other_mode;
73 if (recog_memoized (in_insn) < 0
74 || get_attr_type (in_insn) != TYPE_FPSTORE
75 || recog_memoized (out_insn) < 0)
78 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
80 set = single_set (out_insn);
84 other_mode = GET_MODE (SET_SRC (set));
86 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
90 #ifndef DO_FRAME_NOTES
91 #ifdef INCOMING_RETURN_ADDR_RTX
92 #define DO_FRAME_NOTES 1
94 #define DO_FRAME_NOTES 0
98 static int hppa_address_cost (rtx);
99 static bool hppa_rtx_costs (rtx, int, int, int *);
100 static inline rtx force_mode (enum machine_mode, rtx);
101 static void pa_reorg (void);
102 static void pa_combine_instructions (void);
103 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
104 static int forward_branch_p (rtx);
105 static int shadd_constant_p (int);
106 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
107 static int compute_movstrsi_length (rtx);
108 static bool pa_assemble_integer (rtx, unsigned int, int);
109 static void remove_useless_addtr_insns (int);
110 static void store_reg (int, int, int);
111 static void store_reg_modify (int, int, int);
112 static void load_reg (int, int, int);
113 static void set_reg_plus_d (int, int, int, int);
114 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
115 static void update_total_code_bytes (int);
116 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
117 static int pa_adjust_cost (rtx, rtx, rtx, int);
118 static int pa_adjust_priority (rtx, int);
119 static int pa_issue_rate (void);
120 static void pa_select_section (tree, int, unsigned HOST_WIDE_INT)
122 static void pa_encode_section_info (tree, rtx, int);
123 static const char *pa_strip_name_encoding (const char *);
124 static bool pa_function_ok_for_sibcall (tree, tree);
125 static void pa_globalize_label (FILE *, const char *)
127 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
128 HOST_WIDE_INT, tree);
129 #if !defined(USE_COLLECT2)
130 static void pa_asm_out_constructor (rtx, int);
131 static void pa_asm_out_destructor (rtx, int);
133 static void pa_init_builtins (void);
134 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
135 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
136 static struct deferred_plabel *get_plabel (const char *)
138 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
139 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
140 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
141 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
142 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
143 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
144 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
145 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
146 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
147 static void output_deferred_plabels (void);
149 /* Save the operands last given to a compare for use when we
150 generate a scc or bcc insn. */
151 rtx hppa_compare_op0, hppa_compare_op1;
152 enum cmp_type hppa_branch_type;
154 /* Which cpu we are scheduling for. */
155 enum processor_type pa_cpu;
157 /* String to hold which cpu we are scheduling for. */
158 const char *pa_cpu_string;
160 /* Which architecture we are generating code for. */
161 enum architecture_type pa_arch;
163 /* String to hold which architecture we are generating code for. */
164 const char *pa_arch_string;
166 /* Counts for the number of callee-saved general and floating point
167 registers which were saved by the current function's prologue. */
168 static int gr_saved, fr_saved;
170 static rtx find_addr_reg (rtx);
172 /* Keep track of the number of bytes we have output in the CODE subspace
173 during this compilation so we'll know when to emit inline long-calls. */
174 unsigned long total_code_bytes;
176 /* The last address of the previous function plus the number of bytes in
177 associated thunks that have been output. This is used to determine if
178 a thunk can use an IA-relative branch to reach its target function. */
179 static int last_address;
181 /* Variables to handle plabels that we discover are necessary at assembly
182 output time. They are output after the current function. */
183 struct deferred_plabel GTY(())
188 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
190 static size_t n_deferred_plabels = 0;
193 /* Initialize the GCC target structure. */
195 #undef TARGET_ASM_ALIGNED_HI_OP
196 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
197 #undef TARGET_ASM_ALIGNED_SI_OP
198 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
199 #undef TARGET_ASM_ALIGNED_DI_OP
200 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
201 #undef TARGET_ASM_UNALIGNED_HI_OP
202 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
203 #undef TARGET_ASM_UNALIGNED_SI_OP
204 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
205 #undef TARGET_ASM_UNALIGNED_DI_OP
206 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
207 #undef TARGET_ASM_INTEGER
208 #define TARGET_ASM_INTEGER pa_assemble_integer
210 #undef TARGET_ASM_FUNCTION_PROLOGUE
211 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
212 #undef TARGET_ASM_FUNCTION_EPILOGUE
213 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
215 #undef TARGET_SCHED_ADJUST_COST
216 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
217 #undef TARGET_SCHED_ADJUST_PRIORITY
218 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
219 #undef TARGET_SCHED_ISSUE_RATE
220 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
222 #undef TARGET_ENCODE_SECTION_INFO
223 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
224 #undef TARGET_STRIP_NAME_ENCODING
225 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
227 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
228 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
230 #undef TARGET_ASM_OUTPUT_MI_THUNK
231 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
232 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
233 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
235 #undef TARGET_ASM_FILE_END
236 #define TARGET_ASM_FILE_END output_deferred_plabels
238 #if !defined(USE_COLLECT2)
239 #undef TARGET_ASM_CONSTRUCTOR
240 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
241 #undef TARGET_ASM_DESTRUCTOR
242 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
245 #undef TARGET_INIT_BUILTINS
246 #define TARGET_INIT_BUILTINS pa_init_builtins
248 #undef TARGET_RTX_COSTS
249 #define TARGET_RTX_COSTS hppa_rtx_costs
250 #undef TARGET_ADDRESS_COST
251 #define TARGET_ADDRESS_COST hppa_address_cost
253 #undef TARGET_MACHINE_DEPENDENT_REORG
254 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
256 struct gcc_target targetm = TARGET_INITIALIZER;
259 override_options (void)
261 if (pa_cpu_string == NULL)
262 pa_cpu_string = TARGET_SCHED_DEFAULT;
264 if (! strcmp (pa_cpu_string, "8000"))
266 pa_cpu_string = "8000";
267 pa_cpu = PROCESSOR_8000;
269 else if (! strcmp (pa_cpu_string, "7100"))
271 pa_cpu_string = "7100";
272 pa_cpu = PROCESSOR_7100;
274 else if (! strcmp (pa_cpu_string, "700"))
276 pa_cpu_string = "700";
277 pa_cpu = PROCESSOR_700;
279 else if (! strcmp (pa_cpu_string, "7100LC"))
281 pa_cpu_string = "7100LC";
282 pa_cpu = PROCESSOR_7100LC;
284 else if (! strcmp (pa_cpu_string, "7200"))
286 pa_cpu_string = "7200";
287 pa_cpu = PROCESSOR_7200;
289 else if (! strcmp (pa_cpu_string, "7300"))
291 pa_cpu_string = "7300";
292 pa_cpu = PROCESSOR_7300;
296 warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, 7300, and 8000\n", pa_cpu_string);
299 /* Set the instruction set architecture. */
300 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
302 pa_arch_string = "1.0";
303 pa_arch = ARCHITECTURE_10;
304 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
306 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
308 pa_arch_string = "1.1";
309 pa_arch = ARCHITECTURE_11;
310 target_flags &= ~MASK_PA_20;
311 target_flags |= MASK_PA_11;
313 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
315 pa_arch_string = "2.0";
316 pa_arch = ARCHITECTURE_20;
317 target_flags |= MASK_PA_11 | MASK_PA_20;
319 else if (pa_arch_string)
321 warning ("unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
324 /* Unconditional branches in the delay slot are not compatible with dwarf2
325 call frame information. There is no benefit in using this optimization
326 on PA8000 and later processors. */
327 if (pa_cpu >= PROCESSOR_8000
328 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
329 || flag_unwind_tables)
330 target_flags &= ~MASK_JUMP_IN_DELAY;
332 if (flag_pic && TARGET_PORTABLE_RUNTIME)
334 warning ("PIC code generation is not supported in the portable runtime model\n");
337 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
339 warning ("PIC code generation is not compatible with fast indirect calls\n");
342 if (! TARGET_GAS && write_symbols != NO_DEBUG)
344 warning ("-g is only supported when using GAS on this processor,");
345 warning ("-g option disabled");
346 write_symbols = NO_DEBUG;
349 /* We only support the "big PIC" model now. And we always generate PIC
350 code when in 64bit mode. */
351 if (flag_pic == 1 || TARGET_64BIT)
354 /* We can't guarantee that .dword is available for 32-bit targets. */
355 if (UNITS_PER_WORD == 4)
356 targetm.asm_out.aligned_op.di = NULL;
358 /* The unaligned ops are only available when using GAS. */
361 targetm.asm_out.unaligned_op.hi = NULL;
362 targetm.asm_out.unaligned_op.si = NULL;
363 targetm.asm_out.unaligned_op.di = NULL;
368 pa_init_builtins (void)
370 #ifdef DONT_HAVE_FPUTC_UNLOCKED
371 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
372 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
376 /* Return nonzero only if OP is a register of mode MODE,
379 reg_or_0_operand (rtx op, enum machine_mode mode)
381 return (op == CONST0_RTX (mode) || register_operand (op, mode));
384 /* Return nonzero if OP is suitable for use in a call to a named
387 For 2.5 try to eliminate either call_operand_address or
388 function_label_operand, they perform very similar functions. */
390 call_operand_address (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
392 return (GET_MODE (op) == word_mode
393 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
396 /* Return 1 if X contains a symbolic expression. We know these
397 expressions will have one of a few well defined forms, so
398 we need only check those forms. */
400 symbolic_expression_p (rtx x)
403 /* Strip off any HIGH. */
404 if (GET_CODE (x) == HIGH)
407 return (symbolic_operand (x, VOIDmode));
411 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
413 switch (GET_CODE (op))
420 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
421 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
422 && GET_CODE (XEXP (op, 1)) == CONST_INT);
428 /* Return truth value of statement that OP is a symbolic memory
429 operand of mode MODE. */
432 symbolic_memory_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
434 if (GET_CODE (op) == SUBREG)
435 op = SUBREG_REG (op);
436 if (GET_CODE (op) != MEM)
439 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
440 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
443 /* Return 1 if the operand is either a register or a memory operand that is
447 reg_or_nonsymb_mem_operand (rtx op, enum machine_mode mode)
449 if (register_operand (op, mode))
452 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
458 /* Return 1 if the operand is either a register, zero, or a memory operand
459 that is not symbolic. */
462 reg_or_0_or_nonsymb_mem_operand (rtx op, enum machine_mode mode)
464 if (register_operand (op, mode))
467 if (op == CONST0_RTX (mode))
470 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
476 /* Return 1 if the operand is a register operand or a non-symbolic memory
477 operand after reload. This predicate is used for branch patterns that
478 internally handle register reloading. We need to accept non-symbolic
479 memory operands after reload to ensure that the pattern is still valid
480 if reload didn't find a hard register for the operand. */
483 reg_before_reload_operand (rtx op, enum machine_mode mode)
485 /* Don't accept a SUBREG since it will need a reload. */
486 if (GET_CODE (op) == SUBREG)
489 if (register_operand (op, mode))
493 && memory_operand (op, mode)
494 && ! symbolic_memory_operand (op, mode))
500 /* Accept any constant that can be moved in one instruction into a
503 cint_ok_for_move (HOST_WIDE_INT intval)
505 /* OK if ldo, ldil, or zdepi, can be used. */
506 return (CONST_OK_FOR_LETTER_P (intval, 'J')
507 || CONST_OK_FOR_LETTER_P (intval, 'N')
508 || CONST_OK_FOR_LETTER_P (intval, 'K'));
511 /* Accept anything that can be moved in one instruction into a general
514 move_operand (rtx op, enum machine_mode mode)
516 if (register_operand (op, mode))
519 if (GET_CODE (op) == CONSTANT_P_RTX)
522 if (GET_CODE (op) == CONST_INT)
523 return cint_ok_for_move (INTVAL (op));
525 if (GET_CODE (op) == SUBREG)
526 op = SUBREG_REG (op);
527 if (GET_CODE (op) != MEM)
532 /* We consider a LO_SUM DLT reference a move_operand now since it has
533 been merged into the normal movsi/movdi patterns. */
534 if (GET_CODE (op) == LO_SUM
535 && GET_CODE (XEXP (op, 0)) == REG
536 && REG_OK_FOR_BASE_P (XEXP (op, 0))
537 && GET_CODE (XEXP (op, 1)) == UNSPEC
538 && GET_MODE (op) == Pmode)
541 /* Since move_operand is only used for source operands, we can always
542 allow scaled indexing! */
543 if (! TARGET_DISABLE_INDEXING
544 && GET_CODE (op) == PLUS
545 && ((GET_CODE (XEXP (op, 0)) == MULT
546 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
547 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
548 && INTVAL (XEXP (XEXP (op, 0), 1))
549 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
550 && GET_CODE (XEXP (op, 1)) == REG)
551 || (GET_CODE (XEXP (op, 1)) == MULT
552 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
553 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
554 && INTVAL (XEXP (XEXP (op, 1), 1))
555 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
556 && GET_CODE (XEXP (op, 0)) == REG)))
559 return memory_address_p (mode, op);
562 /* Accept REG and any CONST_INT that can be moved in one instruction into a
565 reg_or_cint_move_operand (rtx op, enum machine_mode mode)
567 if (register_operand (op, mode))
570 if (GET_CODE (op) == CONST_INT)
571 return cint_ok_for_move (INTVAL (op));
577 pic_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
582 switch (GET_CODE (op))
588 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
589 && GET_CODE (XEXP (op, 1)) == CONST_INT);
596 fp_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
598 return reg_renumber && FP_REG_P (op);
603 /* Return truth value of whether OP can be used as an operand in a
604 three operand arithmetic insn that accepts registers of mode MODE
605 or 14-bit signed integers. */
607 arith_operand (rtx op, enum machine_mode mode)
609 return (register_operand (op, mode)
610 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
613 /* Return truth value of whether OP can be used as an operand in a
614 three operand arithmetic insn that accepts registers of mode MODE
615 or 11-bit signed integers. */
617 arith11_operand (rtx op, enum machine_mode mode)
619 return (register_operand (op, mode)
620 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
623 /* Return truth value of whether OP can be used as an operand in a
626 adddi3_operand (rtx op, enum machine_mode mode)
628 return (register_operand (op, mode)
629 || (GET_CODE (op) == CONST_INT
630 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
633 /* A constant integer suitable for use in a PRE_MODIFY memory
636 pre_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
638 return (GET_CODE (op) == CONST_INT
639 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
642 /* A constant integer suitable for use in a POST_MODIFY memory
645 post_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
647 return (GET_CODE (op) == CONST_INT
648 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
652 arith_double_operand (rtx op, enum machine_mode mode)
654 return (register_operand (op, mode)
655 || (GET_CODE (op) == CONST_DOUBLE
656 && GET_MODE (op) == mode
657 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
658 && ((CONST_DOUBLE_HIGH (op) >= 0)
659 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
662 /* Return truth value of whether OP is an integer which fits the
663 range constraining immediate operands in three-address insns, or
664 is an integer register. */
667 ireg_or_int5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
669 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
670 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
673 /* Return nonzero if OP is an integer register, else return zero. */
675 ireg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
677 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
680 /* Return truth value of whether OP is an integer which fits the
681 range constraining immediate operands in three-address insns. */
684 int5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
686 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
690 uint5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
692 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
696 int11_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
698 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
702 uint32_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
704 #if HOST_BITS_PER_WIDE_INT > 32
705 /* All allowed constants will fit a CONST_INT. */
706 return (GET_CODE (op) == CONST_INT
707 && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
709 return (GET_CODE (op) == CONST_INT
710 || (GET_CODE (op) == CONST_DOUBLE
711 && CONST_DOUBLE_HIGH (op) == 0));
716 arith5_operand (rtx op, enum machine_mode mode)
718 return register_operand (op, mode) || int5_operand (op, mode);
721 /* True iff zdepi can be used to generate this CONST_INT.
722 zdepi first sign extends a 5 bit signed number to a given field
723 length, then places this field anywhere in a zero. */
725 zdepi_cint_p (unsigned HOST_WIDE_INT x)
727 unsigned HOST_WIDE_INT lsb_mask, t;
729 /* This might not be obvious, but it's at least fast.
730 This function is critical; we don't have the time loops would take. */
732 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
733 /* Return true iff t is a power of two. */
734 return ((t & (t - 1)) == 0);
737 /* True iff depi or extru can be used to compute (reg & mask).
738 Accept bit pattern like these:
743 and_mask_p (unsigned HOST_WIDE_INT mask)
746 mask += mask & -mask;
747 return (mask & (mask - 1)) == 0;
750 /* True iff depi or extru can be used to compute (reg & OP). */
752 and_operand (rtx op, enum machine_mode mode)
754 return (register_operand (op, mode)
755 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
758 /* True iff depi can be used to compute (reg | MASK). */
760 ior_mask_p (unsigned HOST_WIDE_INT mask)
762 mask += mask & -mask;
763 return (mask & (mask - 1)) == 0;
766 /* True iff depi can be used to compute (reg | OP). */
768 ior_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
770 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
774 lhs_lshift_operand (rtx op, enum machine_mode mode)
776 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
779 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
780 Such values can be the left hand side x in (x << r), using the zvdepi
783 lhs_lshift_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
785 unsigned HOST_WIDE_INT x;
786 if (GET_CODE (op) != CONST_INT)
788 x = INTVAL (op) >> 4;
789 return (x & (x + 1)) == 0;
793 arith32_operand (rtx op, enum machine_mode mode)
795 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
799 pc_or_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
801 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
804 /* Legitimize PIC addresses. If the address is already
805 position-independent, we return ORIG. Newly generated
806 position-independent addresses go to REG. If we need more
807 than one register, we lose. */
810 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
814 /* Labels need special handling. */
815 if (pic_label_operand (orig, mode))
817 /* We do not want to go through the movXX expanders here since that
818 would create recursion.
820 Nor do we really want to call a generator for a named pattern
821 since that requires multiple patterns if we want to support
824 So instead we just emit the raw set, which avoids the movXX
825 expanders completely. */
826 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
827 current_function_uses_pic_offset_table = 1;
830 if (GET_CODE (orig) == SYMBOL_REF)
836 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
837 gen_rtx_HIGH (word_mode, orig)));
839 = gen_rtx_MEM (Pmode,
840 gen_rtx_LO_SUM (Pmode, reg,
841 gen_rtx_UNSPEC (Pmode,
845 current_function_uses_pic_offset_table = 1;
846 RTX_UNCHANGING_P (pic_ref) = 1;
847 emit_move_insn (reg, pic_ref);
850 else if (GET_CODE (orig) == CONST)
854 if (GET_CODE (XEXP (orig, 0)) == PLUS
855 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
861 if (GET_CODE (XEXP (orig, 0)) == PLUS)
863 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
864 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
865 base == reg ? 0 : reg);
868 if (GET_CODE (orig) == CONST_INT)
870 if (INT_14_BITS (orig))
871 return plus_constant (base, INTVAL (orig));
872 orig = force_reg (Pmode, orig);
874 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
875 /* Likewise, should we set special REG_NOTEs here? */
880 /* Try machine-dependent ways of modifying an illegitimate address
881 to be legitimate. If we find one, return the new, valid address.
882 This macro is used in only one place: `memory_address' in explow.c.
884 OLDX is the address as it was before break_out_memory_refs was called.
885 In some cases it is useful to look at this to decide what needs to be done.
887 MODE and WIN are passed so that this macro can use
888 GO_IF_LEGITIMATE_ADDRESS.
890 It is always safe for this macro to do nothing. It exists to recognize
891 opportunities to optimize the output.
893 For the PA, transform:
895 memory(X + <large int>)
899 if (<large int> & mask) >= 16
900 Y = (<large int> & ~mask) + mask + 1 Round up.
902 Y = (<large int> & ~mask) Round down.
904 memory (Z + (<large int> - Y));
906 This is for CSE to find several similar references, and only use one Z.
908 X can either be a SYMBOL_REF or REG, but because combine can not
909 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
910 D will not fit in 14 bits.
912 MODE_FLOAT references allow displacements which fit in 5 bits, so use
915 MODE_INT references allow displacements which fit in 14 bits, so use
918 This relies on the fact that most mode MODE_FLOAT references will use FP
919 registers and most mode MODE_INT references will use integer registers.
920 (In the rare case of an FP register used in an integer MODE, we depend
921 on secondary reloads to clean things up.)
924 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
925 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
926 addressing modes to be used).
928 Put X and Z into registers. Then put the entire expression into
932 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
933 enum machine_mode mode)
938 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
940 /* Strip off CONST. */
941 if (GET_CODE (x) == CONST)
944 /* Special case. Get the SYMBOL_REF into a register and use indexing.
945 That should always be safe. */
946 if (GET_CODE (x) == PLUS
947 && GET_CODE (XEXP (x, 0)) == REG
948 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
950 rtx reg = force_reg (Pmode, XEXP (x, 1));
951 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
954 /* Note we must reject symbols which represent function addresses
955 since the assembler/linker can't handle arithmetic on plabels. */
956 if (GET_CODE (x) == PLUS
957 && GET_CODE (XEXP (x, 1)) == CONST_INT
958 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
959 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
960 || GET_CODE (XEXP (x, 0)) == REG))
962 rtx int_part, ptr_reg;
964 int offset = INTVAL (XEXP (x, 1));
967 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
968 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
970 /* Choose which way to round the offset. Round up if we
971 are >= halfway to the next boundary. */
972 if ((offset & mask) >= ((mask + 1) / 2))
973 newoffset = (offset & ~ mask) + mask + 1;
975 newoffset = (offset & ~ mask);
977 /* If the newoffset will not fit in 14 bits (ldo), then
978 handling this would take 4 or 5 instructions (2 to load
979 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
980 add the new offset and the SYMBOL_REF.) Combine can
981 not handle 4->2 or 5->2 combinations, so do not create
983 if (! VAL_14_BITS_P (newoffset)
984 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
986 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
989 gen_rtx_HIGH (Pmode, const_part));
992 gen_rtx_LO_SUM (Pmode,
993 tmp_reg, const_part));
997 if (! VAL_14_BITS_P (newoffset))
998 int_part = force_reg (Pmode, GEN_INT (newoffset));
1000 int_part = GEN_INT (newoffset);
1002 ptr_reg = force_reg (Pmode,
1003 gen_rtx_PLUS (Pmode,
1004 force_reg (Pmode, XEXP (x, 0)),
1007 return plus_constant (ptr_reg, offset - newoffset);
1010 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1012 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1013 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1014 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1015 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
1016 || GET_CODE (XEXP (x, 1)) == SUBREG)
1017 && GET_CODE (XEXP (x, 1)) != CONST)
1019 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1023 if (GET_CODE (reg1) != REG)
1024 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1026 reg2 = XEXP (XEXP (x, 0), 0);
1027 if (GET_CODE (reg2) != REG)
1028 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1030 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1031 gen_rtx_MULT (Pmode,
1037 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1039 Only do so for floating point modes since this is more speculative
1040 and we lose if it's an integer store. */
1041 if (GET_CODE (x) == PLUS
1042 && GET_CODE (XEXP (x, 0)) == PLUS
1043 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1044 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1045 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1046 && (mode == SFmode || mode == DFmode))
1049 /* First, try and figure out what to use as a base register. */
1050 rtx reg1, reg2, base, idx, orig_base;
1052 reg1 = XEXP (XEXP (x, 0), 1);
1057 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1058 then emit_move_sequence will turn on REG_POINTER so we'll know
1059 it's a base register below. */
1060 if (GET_CODE (reg1) != REG)
1061 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1063 if (GET_CODE (reg2) != REG)
1064 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1066 /* Figure out what the base and index are. */
1068 if (GET_CODE (reg1) == REG
1069 && REG_POINTER (reg1))
1072 orig_base = XEXP (XEXP (x, 0), 1);
1073 idx = gen_rtx_PLUS (Pmode,
1074 gen_rtx_MULT (Pmode,
1075 XEXP (XEXP (XEXP (x, 0), 0), 0),
1076 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1079 else if (GET_CODE (reg2) == REG
1080 && REG_POINTER (reg2))
1083 orig_base = XEXP (x, 1);
1090 /* If the index adds a large constant, try to scale the
1091 constant so that it can be loaded with only one insn. */
1092 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1093 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1094 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1095 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1097 /* Divide the CONST_INT by the scale factor, then add it to A. */
1098 int val = INTVAL (XEXP (idx, 1));
1100 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1101 reg1 = XEXP (XEXP (idx, 0), 0);
1102 if (GET_CODE (reg1) != REG)
1103 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1105 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1107 /* We can now generate a simple scaled indexed address. */
1110 (Pmode, gen_rtx_PLUS (Pmode,
1111 gen_rtx_MULT (Pmode, reg1,
1112 XEXP (XEXP (idx, 0), 1)),
1116 /* If B + C is still a valid base register, then add them. */
1117 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1118 && INTVAL (XEXP (idx, 1)) <= 4096
1119 && INTVAL (XEXP (idx, 1)) >= -4096)
1121 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1124 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1126 reg2 = XEXP (XEXP (idx, 0), 0);
1127 if (GET_CODE (reg2) != CONST_INT)
1128 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1130 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1131 gen_rtx_MULT (Pmode,
1137 /* Get the index into a register, then add the base + index and
1138 return a register holding the result. */
1140 /* First get A into a register. */
1141 reg1 = XEXP (XEXP (idx, 0), 0);
1142 if (GET_CODE (reg1) != REG)
1143 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1145 /* And get B into a register. */
1146 reg2 = XEXP (idx, 1);
1147 if (GET_CODE (reg2) != REG)
1148 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1150 reg1 = force_reg (Pmode,
1151 gen_rtx_PLUS (Pmode,
1152 gen_rtx_MULT (Pmode, reg1,
1153 XEXP (XEXP (idx, 0), 1)),
1156 /* Add the result to our base register and return. */
1157 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1161 /* Uh-oh. We might have an address for x[n-100000]. This needs
1162 special handling to avoid creating an indexed memory address
1163 with x-100000 as the base.
1165 If the constant part is small enough, then it's still safe because
1166 there is a guard page at the beginning and end of the data segment.
1168 Scaled references are common enough that we want to try and rearrange the
1169 terms so that we can use indexing for these addresses too. Only
1170 do the optimization for floatint point modes. */
1172 if (GET_CODE (x) == PLUS
1173 && symbolic_expression_p (XEXP (x, 1)))
1175 /* Ugly. We modify things here so that the address offset specified
1176 by the index expression is computed first, then added to x to form
1177 the entire address. */
1179 rtx regx1, regx2, regy1, regy2, y;
1181 /* Strip off any CONST. */
1183 if (GET_CODE (y) == CONST)
1186 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1188 /* See if this looks like
1189 (plus (mult (reg) (shadd_const))
1190 (const (plus (symbol_ref) (const_int))))
1192 Where const_int is small. In that case the const
1193 expression is a valid pointer for indexing.
1195 If const_int is big, but can be divided evenly by shadd_const
1196 and added to (reg). This allows more scaled indexed addresses. */
1197 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1198 && GET_CODE (XEXP (x, 0)) == MULT
1199 && GET_CODE (XEXP (y, 1)) == CONST_INT
1200 && INTVAL (XEXP (y, 1)) >= -4096
1201 && INTVAL (XEXP (y, 1)) <= 4095
1202 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1203 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1205 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1209 if (GET_CODE (reg1) != REG)
1210 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1212 reg2 = XEXP (XEXP (x, 0), 0);
1213 if (GET_CODE (reg2) != REG)
1214 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1216 return force_reg (Pmode,
1217 gen_rtx_PLUS (Pmode,
1218 gen_rtx_MULT (Pmode,
1223 else if ((mode == DFmode || mode == SFmode)
1224 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1225 && GET_CODE (XEXP (x, 0)) == MULT
1226 && GET_CODE (XEXP (y, 1)) == CONST_INT
1227 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1228 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1229 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1232 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1233 / INTVAL (XEXP (XEXP (x, 0), 1))));
1234 regx2 = XEXP (XEXP (x, 0), 0);
1235 if (GET_CODE (regx2) != REG)
1236 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1237 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1241 gen_rtx_PLUS (Pmode,
1242 gen_rtx_MULT (Pmode, regx2,
1243 XEXP (XEXP (x, 0), 1)),
1244 force_reg (Pmode, XEXP (y, 0))));
1246 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1247 && INTVAL (XEXP (y, 1)) >= -4096
1248 && INTVAL (XEXP (y, 1)) <= 4095)
1250 /* This is safe because of the guard page at the
1251 beginning and end of the data space. Just
1252 return the original address. */
1257 /* Doesn't look like one we can optimize. */
1258 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1259 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1260 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1261 regx1 = force_reg (Pmode,
1262 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1264 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1272 /* For the HPPA, REG and REG+CONST is cost 0
1273 and addresses involving symbolic constants are cost 2.
1275 PIC addresses are very expensive.
1277 It is no coincidence that this has the same structure
1278 as GO_IF_LEGITIMATE_ADDRESS. */
1281 hppa_address_cost (rtx X)
1283 switch (GET_CODE (X))
1296 /* Compute a (partial) cost for rtx X. Return true if the complete
1297 cost has been computed, and false if subexpressions should be
1298 scanned. In either case, *TOTAL contains the cost result. */
1301 hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
1306 if (INTVAL (x) == 0)
1308 else if (INT_14_BITS (x))
1325 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1326 && outer_code != SET)
1333 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1334 *total = COSTS_N_INSNS (3);
1335 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1336 *total = COSTS_N_INSNS (8);
1338 *total = COSTS_N_INSNS (20);
1342 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1344 *total = COSTS_N_INSNS (14);
1352 *total = COSTS_N_INSNS (60);
1355 case PLUS: /* this includes shNadd insns */
1357 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1358 *total = COSTS_N_INSNS (3);
1360 *total = COSTS_N_INSNS (1);
1366 *total = COSTS_N_INSNS (1);
1374 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1375 new rtx with the correct mode. */
1377 force_mode (enum machine_mode mode, rtx orig)
1379 if (mode == GET_MODE (orig))
1382 if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
1385 return gen_rtx_REG (mode, REGNO (orig));
1388 /* Emit insns to move operands[1] into operands[0].
1390 Return 1 if we have written out everything that needs to be done to
1391 do the move. Otherwise, return 0 and the caller will emit the move
1394 Note SCRATCH_REG may not be in the proper mode depending on how it
1395 will be used. This routine is responsible for creating a new copy
1396 of SCRATCH_REG in the proper mode. */
1399 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1401 register rtx operand0 = operands[0];
1402 register rtx operand1 = operands[1];
1406 && reload_in_progress && GET_CODE (operand0) == REG
1407 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1408 operand0 = reg_equiv_mem[REGNO (operand0)];
1409 else if (scratch_reg
1410 && reload_in_progress && GET_CODE (operand0) == SUBREG
1411 && GET_CODE (SUBREG_REG (operand0)) == REG
1412 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1414 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1415 the code which tracks sets/uses for delete_output_reload. */
1416 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1417 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1418 SUBREG_BYTE (operand0));
1419 operand0 = alter_subreg (&temp);
1423 && reload_in_progress && GET_CODE (operand1) == REG
1424 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1425 operand1 = reg_equiv_mem[REGNO (operand1)];
1426 else if (scratch_reg
1427 && reload_in_progress && GET_CODE (operand1) == SUBREG
1428 && GET_CODE (SUBREG_REG (operand1)) == REG
1429 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1431 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1432 the code which tracks sets/uses for delete_output_reload. */
1433 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1434 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1435 SUBREG_BYTE (operand1));
1436 operand1 = alter_subreg (&temp);
1439 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1440 && ((tem = find_replacement (&XEXP (operand0, 0)))
1441 != XEXP (operand0, 0)))
1442 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1443 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1444 && ((tem = find_replacement (&XEXP (operand1, 0)))
1445 != XEXP (operand1, 0)))
1446 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1448 /* Handle secondary reloads for loads/stores of FP registers from
1449 REG+D addresses where D does not fit in 5 bits, including
1450 (subreg (mem (addr))) cases. */
1451 if (fp_reg_operand (operand0, mode)
1452 && ((GET_CODE (operand1) == MEM
1453 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1454 || ((GET_CODE (operand1) == SUBREG
1455 && GET_CODE (XEXP (operand1, 0)) == MEM
1456 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1459 if (GET_CODE (operand1) == SUBREG)
1460 operand1 = XEXP (operand1, 0);
1462 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1463 it in WORD_MODE regardless of what mode it was originally given
1465 scratch_reg = force_mode (word_mode, scratch_reg);
1467 /* D might not fit in 14 bits either; for such cases load D into
1469 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1471 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1472 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1474 XEXP (XEXP (operand1, 0), 0),
1478 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1479 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1480 gen_rtx_MEM (mode, scratch_reg)));
1483 else if (fp_reg_operand (operand1, mode)
1484 && ((GET_CODE (operand0) == MEM
1485 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1486 || ((GET_CODE (operand0) == SUBREG)
1487 && GET_CODE (XEXP (operand0, 0)) == MEM
1488 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1491 if (GET_CODE (operand0) == SUBREG)
1492 operand0 = XEXP (operand0, 0);
1494 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1495 it in WORD_MODE regardless of what mode it was originally given
1497 scratch_reg = force_mode (word_mode, scratch_reg);
1499 /* D might not fit in 14 bits either; for such cases load D into
1501 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1503 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1504 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1507 XEXP (XEXP (operand0, 0),
1512 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1513 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1517 /* Handle secondary reloads for loads of FP registers from constant
1518 expressions by forcing the constant into memory.
1520 use scratch_reg to hold the address of the memory location.
1522 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1523 NO_REGS when presented with a const_int and a register class
1524 containing only FP registers. Doing so unfortunately creates
1525 more problems than it solves. Fix this for 2.5. */
1526 else if (fp_reg_operand (operand0, mode)
1527 && CONSTANT_P (operand1)
1532 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1533 it in WORD_MODE regardless of what mode it was originally given
1535 scratch_reg = force_mode (word_mode, scratch_reg);
1537 /* Force the constant into memory and put the address of the
1538 memory location into scratch_reg. */
1539 xoperands[0] = scratch_reg;
1540 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1541 emit_move_sequence (xoperands, Pmode, 0);
1543 /* Now load the destination register. */
1544 emit_insn (gen_rtx_SET (mode, operand0,
1545 gen_rtx_MEM (mode, scratch_reg)));
1548 /* Handle secondary reloads for SAR. These occur when trying to load
1549 the SAR from memory, FP register, or with a constant. */
1550 else if (GET_CODE (operand0) == REG
1551 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1552 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1553 && (GET_CODE (operand1) == MEM
1554 || GET_CODE (operand1) == CONST_INT
1555 || (GET_CODE (operand1) == REG
1556 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1559 /* D might not fit in 14 bits either; for such cases load D into
1561 if (GET_CODE (operand1) == MEM
1562 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1564 /* We are reloading the address into the scratch register, so we
1565 want to make sure the scratch register is a full register. */
1566 scratch_reg = force_mode (word_mode, scratch_reg);
1568 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1569 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1572 XEXP (XEXP (operand1, 0),
1576 /* Now we are going to load the scratch register from memory,
1577 we want to load it in the same width as the original MEM,
1578 which must be the same as the width of the ultimate destination,
1580 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1582 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1587 /* We want to load the scratch register using the same mode as
1588 the ultimate destination. */
1589 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1591 emit_move_insn (scratch_reg, operand1);
1594 /* And emit the insn to set the ultimate destination. We know that
1595 the scratch register has the same mode as the destination at this
1597 emit_move_insn (operand0, scratch_reg);
1600 /* Handle most common case: storing into a register. */
1601 else if (register_operand (operand0, mode))
1603 if (register_operand (operand1, mode)
1604 || (GET_CODE (operand1) == CONST_INT
1605 && cint_ok_for_move (INTVAL (operand1)))
1606 || (operand1 == CONST0_RTX (mode))
1607 || (GET_CODE (operand1) == HIGH
1608 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1609 /* Only `general_operands' can come here, so MEM is ok. */
1610 || GET_CODE (operand1) == MEM)
1612 /* Run this case quickly. */
1613 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1617 else if (GET_CODE (operand0) == MEM)
1619 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1620 && !(reload_in_progress || reload_completed))
1622 rtx temp = gen_reg_rtx (DFmode);
1624 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1625 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1628 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1630 /* Run this case quickly. */
1631 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1634 if (! (reload_in_progress || reload_completed))
1636 operands[0] = validize_mem (operand0);
1637 operands[1] = operand1 = force_reg (mode, operand1);
1641 /* Simplify the source if we need to.
1642 Note we do have to handle function labels here, even though we do
1643 not consider them legitimate constants. Loop optimizations can
1644 call the emit_move_xxx with one as a source. */
1645 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1646 || function_label_operand (operand1, mode)
1647 || (GET_CODE (operand1) == HIGH
1648 && symbolic_operand (XEXP (operand1, 0), mode)))
1652 if (GET_CODE (operand1) == HIGH)
1655 operand1 = XEXP (operand1, 0);
1657 if (symbolic_operand (operand1, mode))
1659 /* Argh. The assembler and linker can't handle arithmetic
1662 So we force the plabel into memory, load operand0 from
1663 the memory location, then add in the constant part. */
1664 if ((GET_CODE (operand1) == CONST
1665 && GET_CODE (XEXP (operand1, 0)) == PLUS
1666 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1667 || function_label_operand (operand1, mode))
1669 rtx temp, const_part;
1671 /* Figure out what (if any) scratch register to use. */
1672 if (reload_in_progress || reload_completed)
1674 scratch_reg = scratch_reg ? scratch_reg : operand0;
1675 /* SCRATCH_REG will hold an address and maybe the actual
1676 data. We want it in WORD_MODE regardless of what mode it
1677 was originally given to us. */
1678 scratch_reg = force_mode (word_mode, scratch_reg);
1681 scratch_reg = gen_reg_rtx (Pmode);
1683 if (GET_CODE (operand1) == CONST)
1685 /* Save away the constant part of the expression. */
1686 const_part = XEXP (XEXP (operand1, 0), 1);
1687 if (GET_CODE (const_part) != CONST_INT)
1690 /* Force the function label into memory. */
1691 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1695 /* No constant part. */
1696 const_part = NULL_RTX;
1698 /* Force the function label into memory. */
1699 temp = force_const_mem (mode, operand1);
1703 /* Get the address of the memory location. PIC-ify it if
1705 temp = XEXP (temp, 0);
1707 temp = legitimize_pic_address (temp, mode, scratch_reg);
1709 /* Put the address of the memory location into our destination
1712 emit_move_sequence (operands, mode, scratch_reg);
1714 /* Now load from the memory location into our destination
1716 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1717 emit_move_sequence (operands, mode, scratch_reg);
1719 /* And add back in the constant part. */
1720 if (const_part != NULL_RTX)
1721 expand_inc (operand0, const_part);
1730 if (reload_in_progress || reload_completed)
1732 temp = scratch_reg ? scratch_reg : operand0;
1733 /* TEMP will hold an address and maybe the actual
1734 data. We want it in WORD_MODE regardless of what mode it
1735 was originally given to us. */
1736 temp = force_mode (word_mode, temp);
1739 temp = gen_reg_rtx (Pmode);
1741 /* (const (plus (symbol) (const_int))) must be forced to
1742 memory during/after reload if the const_int will not fit
1744 if (GET_CODE (operand1) == CONST
1745 && GET_CODE (XEXP (operand1, 0)) == PLUS
1746 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1747 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1748 && (reload_completed || reload_in_progress)
1751 operands[1] = force_const_mem (mode, operand1);
1752 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1754 emit_move_sequence (operands, mode, temp);
1758 operands[1] = legitimize_pic_address (operand1, mode, temp);
1759 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1762 /* On the HPPA, references to data space are supposed to use dp,
1763 register 27, but showing it in the RTL inhibits various cse
1764 and loop optimizations. */
1769 if (reload_in_progress || reload_completed)
1771 temp = scratch_reg ? scratch_reg : operand0;
1772 /* TEMP will hold an address and maybe the actual
1773 data. We want it in WORD_MODE regardless of what mode it
1774 was originally given to us. */
1775 temp = force_mode (word_mode, temp);
1778 temp = gen_reg_rtx (mode);
1780 /* Loading a SYMBOL_REF into a register makes that register
1781 safe to be used as the base in an indexed address.
1783 Don't mark hard registers though. That loses. */
1784 if (GET_CODE (operand0) == REG
1785 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1786 REG_POINTER (operand0) = 1;
1787 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1788 REG_POINTER (temp) = 1;
1790 set = gen_rtx_SET (mode, operand0, temp);
1792 set = gen_rtx_SET (VOIDmode,
1794 gen_rtx_LO_SUM (mode, temp, operand1));
1796 emit_insn (gen_rtx_SET (VOIDmode,
1798 gen_rtx_HIGH (mode, operand1)));
1804 else if (GET_CODE (operand1) != CONST_INT
1805 || ! cint_ok_for_move (INTVAL (operand1)))
1807 rtx extend = NULL_RTX;
1810 if (TARGET_64BIT && GET_CODE (operand1) == CONST_INT
1811 && HOST_BITS_PER_WIDE_INT > 32
1812 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1814 HOST_WIDE_INT val = INTVAL (operand1);
1817 /* Extract the low order 32 bits of the value and sign extend.
1818 If the new value is the same as the original value, we can
1819 can use the original value as-is. If the new value is
1820 different, we use it and insert the most-significant 32-bits
1821 of the original value into the final result. */
1822 nval = ((val & (((HOST_WIDE_INT) 2 << 31) - 1))
1823 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1826 #if HOST_BITS_PER_WIDE_INT > 32
1827 extend = GEN_INT (val >> 32);
1829 operand1 = GEN_INT (nval);
1833 if (reload_in_progress || reload_completed)
1836 temp = gen_reg_rtx (mode);
1838 /* We don't directly split DImode constants on 32-bit targets
1839 because PLUS uses an 11-bit immediate and the insn sequence
1840 generated is not as efficient as the one using HIGH/LO_SUM. */
1841 if (GET_CODE (operand1) == CONST_INT
1842 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
1844 /* Directly break constant into high and low parts. This
1845 provides better optimization opportunities because various
1846 passes recognize constants split with PLUS but not LO_SUM.
1847 We use a 14-bit signed low part except when the addition
1848 of 0x4000 to the high part might change the sign of the
1850 HOST_WIDE_INT value = INTVAL (operand1);
1851 HOST_WIDE_INT low = value & 0x3fff;
1852 HOST_WIDE_INT high = value & ~ 0x3fff;
1856 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1864 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1865 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1869 emit_insn (gen_rtx_SET (VOIDmode, temp,
1870 gen_rtx_HIGH (mode, operand1)));
1871 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1874 emit_move_insn (operands[0], operands[1]);
1876 if (extend != NULL_RTX)
1877 emit_insn (gen_insv (operands[0], GEN_INT (32), const0_rtx,
1883 /* Now have insn-emit do whatever it normally does. */
1887 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1888 it will need a link/runtime reloc). */
1891 reloc_needed (tree exp)
1895 switch (TREE_CODE (exp))
1902 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1903 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1908 case NON_LVALUE_EXPR:
1909 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1915 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1916 if (TREE_VALUE (link) != 0)
1917 reloc |= reloc_needed (TREE_VALUE (link));
1930 /* Does operand (which is a symbolic_operand) live in text space?
1931 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
1935 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
1937 if (GET_CODE (operand) == CONST)
1938 operand = XEXP (XEXP (operand, 0), 0);
1941 if (GET_CODE (operand) == SYMBOL_REF)
1942 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1946 if (GET_CODE (operand) == SYMBOL_REF)
1947 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1953 /* Return the best assembler insn template
1954 for moving operands[1] into operands[0] as a fullword. */
1956 singlemove_string (rtx *operands)
1958 HOST_WIDE_INT intval;
1960 if (GET_CODE (operands[0]) == MEM)
1961 return "stw %r1,%0";
1962 if (GET_CODE (operands[1]) == MEM)
1964 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1969 if (GET_MODE (operands[1]) != SFmode)
1972 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1974 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1975 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1977 operands[1] = GEN_INT (i);
1978 /* Fall through to CONST_INT case. */
1980 if (GET_CODE (operands[1]) == CONST_INT)
1982 intval = INTVAL (operands[1]);
1984 if (VAL_14_BITS_P (intval))
1986 else if ((intval & 0x7ff) == 0)
1987 return "ldil L'%1,%0";
1988 else if (zdepi_cint_p (intval))
1989 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
1991 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1993 return "copy %1,%0";
1997 /* Compute position (in OP[1]) and width (in OP[2])
1998 useful for copying IMM to a register using the zdepi
1999 instructions. Store the immediate value to insert in OP[0]. */
2001 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2005 /* Find the least significant set bit in IMM. */
2006 for (lsb = 0; lsb < 32; lsb++)
2013 /* Choose variants based on *sign* of the 5-bit field. */
2014 if ((imm & 0x10) == 0)
2015 len = (lsb <= 28) ? 4 : 32 - lsb;
2018 /* Find the width of the bitstring in IMM. */
2019 for (len = 5; len < 32; len++)
2021 if ((imm & (1 << len)) == 0)
2025 /* Sign extend IMM as a 5-bit value. */
2026 imm = (imm & 0xf) - 0x10;
2034 /* Compute position (in OP[1]) and width (in OP[2])
2035 useful for copying IMM to a register using the depdi,z
2036 instructions. Store the immediate value to insert in OP[0]. */
2038 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2040 HOST_WIDE_INT lsb, len;
2042 /* Find the least significant set bit in IMM. */
2043 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2050 /* Choose variants based on *sign* of the 5-bit field. */
2051 if ((imm & 0x10) == 0)
2052 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2053 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2056 /* Find the width of the bitstring in IMM. */
2057 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2059 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2063 /* Sign extend IMM as a 5-bit value. */
2064 imm = (imm & 0xf) - 0x10;
2072 /* Output assembler code to perform a doubleword move insn
2073 with operands OPERANDS. */
2076 output_move_double (rtx *operands)
2078 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2080 rtx addreg0 = 0, addreg1 = 0;
2082 /* First classify both operands. */
2084 if (REG_P (operands[0]))
2086 else if (offsettable_memref_p (operands[0]))
2088 else if (GET_CODE (operands[0]) == MEM)
2093 if (REG_P (operands[1]))
2095 else if (CONSTANT_P (operands[1]))
2097 else if (offsettable_memref_p (operands[1]))
2099 else if (GET_CODE (operands[1]) == MEM)
2104 /* Check for the cases that the operand constraints are not
2105 supposed to allow to happen. Abort if we get one,
2106 because generating code for these cases is painful. */
2108 if (optype0 != REGOP && optype1 != REGOP)
2111 /* Handle auto decrementing and incrementing loads and stores
2112 specifically, since the structure of the function doesn't work
2113 for them without major modification. Do it better when we learn
2114 this port about the general inc/dec addressing of PA.
2115 (This was written by tege. Chide him if it doesn't work.) */
2117 if (optype0 == MEMOP)
2119 /* We have to output the address syntax ourselves, since print_operand
2120 doesn't deal with the addresses we want to use. Fix this later. */
2122 rtx addr = XEXP (operands[0], 0);
2123 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2125 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2127 operands[0] = XEXP (addr, 0);
2128 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2131 if (!reg_overlap_mentioned_p (high_reg, addr))
2133 /* No overlap between high target register and address
2134 register. (We do this in a non-obvious way to
2135 save a register file writeback) */
2136 if (GET_CODE (addr) == POST_INC)
2137 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2138 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2143 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2145 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2147 operands[0] = XEXP (addr, 0);
2148 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2151 if (!reg_overlap_mentioned_p (high_reg, addr))
2153 /* No overlap between high target register and address
2154 register. (We do this in a non-obvious way to
2155 save a register file writeback) */
2156 if (GET_CODE (addr) == PRE_INC)
2157 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2158 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2164 if (optype1 == MEMOP)
2166 /* We have to output the address syntax ourselves, since print_operand
2167 doesn't deal with the addresses we want to use. Fix this later. */
2169 rtx addr = XEXP (operands[1], 0);
2170 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2172 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2174 operands[1] = XEXP (addr, 0);
2175 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2178 if (!reg_overlap_mentioned_p (high_reg, addr))
2180 /* No overlap between high target register and address
2181 register. (We do this in a non-obvious way to
2182 save a register file writeback) */
2183 if (GET_CODE (addr) == POST_INC)
2184 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2185 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2189 /* This is an undefined situation. We should load into the
2190 address register *and* update that register. Probably
2191 we don't need to handle this at all. */
2192 if (GET_CODE (addr) == POST_INC)
2193 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2194 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2197 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2199 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2201 operands[1] = XEXP (addr, 0);
2202 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2205 if (!reg_overlap_mentioned_p (high_reg, addr))
2207 /* No overlap between high target register and address
2208 register. (We do this in a non-obvious way to
2209 save a register file writeback) */
2210 if (GET_CODE (addr) == PRE_INC)
2211 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2212 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2216 /* This is an undefined situation. We should load into the
2217 address register *and* update that register. Probably
2218 we don't need to handle this at all. */
2219 if (GET_CODE (addr) == PRE_INC)
2220 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2221 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2224 else if (GET_CODE (addr) == PLUS
2225 && GET_CODE (XEXP (addr, 0)) == MULT)
2227 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2229 if (!reg_overlap_mentioned_p (high_reg, addr))
2233 xoperands[0] = high_reg;
2234 xoperands[1] = XEXP (addr, 1);
2235 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2236 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2237 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2239 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2245 xoperands[0] = high_reg;
2246 xoperands[1] = XEXP (addr, 1);
2247 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2248 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2249 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2251 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2256 /* If an operand is an unoffsettable memory ref, find a register
2257 we can increment temporarily to make it refer to the second word. */
2259 if (optype0 == MEMOP)
2260 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2262 if (optype1 == MEMOP)
2263 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2265 /* Ok, we can do one word at a time.
2266 Normally we do the low-numbered word first.
2268 In either case, set up in LATEHALF the operands to use
2269 for the high-numbered word and in some cases alter the
2270 operands in OPERANDS to be suitable for the low-numbered word. */
2272 if (optype0 == REGOP)
2273 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2274 else if (optype0 == OFFSOP)
2275 latehalf[0] = adjust_address (operands[0], SImode, 4);
2277 latehalf[0] = operands[0];
2279 if (optype1 == REGOP)
2280 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2281 else if (optype1 == OFFSOP)
2282 latehalf[1] = adjust_address (operands[1], SImode, 4);
2283 else if (optype1 == CNSTOP)
2284 split_double (operands[1], &operands[1], &latehalf[1]);
2286 latehalf[1] = operands[1];
2288 /* If the first move would clobber the source of the second one,
2289 do them in the other order.
2291 This can happen in two cases:
2293 mem -> register where the first half of the destination register
2294 is the same register used in the memory's address. Reload
2295 can create such insns.
2297 mem in this case will be either register indirect or register
2298 indirect plus a valid offset.
2300 register -> register move where REGNO(dst) == REGNO(src + 1)
2301 someone (Tim/Tege?) claimed this can happen for parameter loads.
2303 Handle mem -> register case first. */
2304 if (optype0 == REGOP
2305 && (optype1 == MEMOP || optype1 == OFFSOP)
2306 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2309 /* Do the late half first. */
2311 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2312 output_asm_insn (singlemove_string (latehalf), latehalf);
2316 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2317 return singlemove_string (operands);
2320 /* Now handle register -> register case. */
2321 if (optype0 == REGOP && optype1 == REGOP
2322 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2324 output_asm_insn (singlemove_string (latehalf), latehalf);
2325 return singlemove_string (operands);
2328 /* Normal case: do the two words, low-numbered first. */
2330 output_asm_insn (singlemove_string (operands), operands);
2332 /* Make any unoffsettable addresses point at high-numbered word. */
2334 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2336 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2339 output_asm_insn (singlemove_string (latehalf), latehalf);
2341 /* Undo the adds we just did. */
2343 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2345 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2351 output_fp_move_double (rtx *operands)
2353 if (FP_REG_P (operands[0]))
2355 if (FP_REG_P (operands[1])
2356 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2357 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2359 output_asm_insn ("fldd%F1 %1,%0", operands);
2361 else if (FP_REG_P (operands[1]))
2363 output_asm_insn ("fstd%F0 %1,%0", operands);
2365 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2367 if (GET_CODE (operands[0]) == REG)
2370 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2371 xoperands[0] = operands[0];
2372 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2374 /* This is a pain. You have to be prepared to deal with an
2375 arbitrary address here including pre/post increment/decrement.
2377 so avoid this in the MD. */
2385 /* Return a REG that occurs in ADDR with coefficient 1.
2386 ADDR can be effectively incremented by incrementing REG. */
2389 find_addr_reg (rtx addr)
2391 while (GET_CODE (addr) == PLUS)
2393 if (GET_CODE (XEXP (addr, 0)) == REG)
2394 addr = XEXP (addr, 0);
2395 else if (GET_CODE (XEXP (addr, 1)) == REG)
2396 addr = XEXP (addr, 1);
2397 else if (CONSTANT_P (XEXP (addr, 0)))
2398 addr = XEXP (addr, 1);
2399 else if (CONSTANT_P (XEXP (addr, 1)))
2400 addr = XEXP (addr, 0);
2404 if (GET_CODE (addr) == REG)
2409 /* Emit code to perform a block move.
2411 OPERANDS[0] is the destination pointer as a REG, clobbered.
2412 OPERANDS[1] is the source pointer as a REG, clobbered.
2413 OPERANDS[2] is a register for temporary storage.
2414 OPERANDS[4] is the size as a CONST_INT
2415 OPERANDS[3] is a register for temporary storage.
2416 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2417 OPERANDS[6] is another temporary register. */
2420 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2422 int align = INTVAL (operands[5]);
2423 unsigned long n_bytes = INTVAL (operands[4]);
2425 /* We can't move more than four bytes at a time because the PA
2426 has no longer integer move insns. (Could use fp mem ops?) */
2430 /* Note that we know each loop below will execute at least twice
2431 (else we would have open-coded the copy). */
2435 /* Pre-adjust the loop counter. */
2436 operands[4] = GEN_INT (n_bytes - 8);
2437 output_asm_insn ("ldi %4,%2", operands);
2440 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2441 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2442 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2443 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2444 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2446 /* Handle the residual. There could be up to 7 bytes of
2447 residual to copy! */
2448 if (n_bytes % 8 != 0)
2450 operands[4] = GEN_INT (n_bytes % 4);
2451 if (n_bytes % 8 >= 4)
2452 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2453 if (n_bytes % 4 != 0)
2454 output_asm_insn ("ldw 0(%1),%6", operands);
2455 if (n_bytes % 8 >= 4)
2456 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2457 if (n_bytes % 4 != 0)
2458 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2463 /* Pre-adjust the loop counter. */
2464 operands[4] = GEN_INT (n_bytes - 4);
2465 output_asm_insn ("ldi %4,%2", operands);
2468 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2469 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2470 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2471 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2472 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2474 /* Handle the residual. */
2475 if (n_bytes % 4 != 0)
2477 if (n_bytes % 4 >= 2)
2478 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2479 if (n_bytes % 2 != 0)
2480 output_asm_insn ("ldb 0(%1),%6", operands);
2481 if (n_bytes % 4 >= 2)
2482 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2483 if (n_bytes % 2 != 0)
2484 output_asm_insn ("stb %6,0(%0)", operands);
2489 /* Pre-adjust the loop counter. */
2490 operands[4] = GEN_INT (n_bytes - 2);
2491 output_asm_insn ("ldi %4,%2", operands);
2494 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2495 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2496 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2497 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2498 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2500 /* Handle the residual. */
2501 if (n_bytes % 2 != 0)
2503 output_asm_insn ("ldb 0(%1),%3", operands);
2504 output_asm_insn ("stb %3,0(%0)", operands);
2513 /* Count the number of insns necessary to handle this block move.
2515 Basic structure is the same as emit_block_move, except that we
2516 count insns rather than emit them. */
2519 compute_movstrsi_length (rtx insn)
2521 rtx pat = PATTERN (insn);
2522 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2523 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2524 unsigned int n_insns = 0;
2526 /* We can't move more than four bytes at a time because the PA
2527 has no longer integer move insns. (Could use fp mem ops?) */
2531 /* The basic copying loop. */
2535 if (n_bytes % (2 * align) != 0)
2537 if ((n_bytes % (2 * align)) >= align)
2540 if ((n_bytes % align) != 0)
2544 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2550 output_and (rtx *operands)
2552 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2554 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2555 int ls0, ls1, ms0, p, len;
2557 for (ls0 = 0; ls0 < 32; ls0++)
2558 if ((mask & (1 << ls0)) == 0)
2561 for (ls1 = ls0; ls1 < 32; ls1++)
2562 if ((mask & (1 << ls1)) != 0)
2565 for (ms0 = ls1; ms0 < 32; ms0++)
2566 if ((mask & (1 << ms0)) == 0)
2579 operands[2] = GEN_INT (len);
2580 return "{extru|extrw,u} %1,31,%2,%0";
2584 /* We could use this `depi' for the case above as well, but `depi'
2585 requires one more register file access than an `extru'. */
2590 operands[2] = GEN_INT (p);
2591 operands[3] = GEN_INT (len);
2592 return "{depi|depwi} 0,%2,%3,%0";
2596 return "and %1,%2,%0";
2599 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2600 storing the result in operands[0]. */
2602 output_64bit_and (rtx *operands)
2604 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2606 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2607 int ls0, ls1, ms0, p, len;
2609 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2610 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2613 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2614 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2617 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2618 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2621 if (ms0 != HOST_BITS_PER_WIDE_INT)
2624 if (ls1 == HOST_BITS_PER_WIDE_INT)
2631 operands[2] = GEN_INT (len);
2632 return "extrd,u %1,63,%2,%0";
2636 /* We could use this `depi' for the case above as well, but `depi'
2637 requires one more register file access than an `extru'. */
2642 operands[2] = GEN_INT (p);
2643 operands[3] = GEN_INT (len);
2644 return "depdi 0,%2,%3,%0";
2648 return "and %1,%2,%0";
2652 output_ior (rtx *operands)
2654 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2655 int bs0, bs1, p, len;
2657 if (INTVAL (operands[2]) == 0)
2658 return "copy %1,%0";
2660 for (bs0 = 0; bs0 < 32; bs0++)
2661 if ((mask & (1 << bs0)) != 0)
2664 for (bs1 = bs0; bs1 < 32; bs1++)
2665 if ((mask & (1 << bs1)) == 0)
2668 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2674 operands[2] = GEN_INT (p);
2675 operands[3] = GEN_INT (len);
2676 return "{depi|depwi} -1,%2,%3,%0";
2679 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2680 storing the result in operands[0]. */
2682 output_64bit_ior (rtx *operands)
2684 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2685 int bs0, bs1, p, len;
2687 if (INTVAL (operands[2]) == 0)
2688 return "copy %1,%0";
2690 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2691 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
2694 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2695 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
2698 if (bs1 != HOST_BITS_PER_WIDE_INT
2699 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2705 operands[2] = GEN_INT (p);
2706 operands[3] = GEN_INT (len);
2707 return "depdi -1,%2,%3,%0";
2710 /* Target hook for assembling integer objects. This code handles
2711 aligned SI and DI integers specially, since function references must
2712 be preceded by P%. */
2715 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
2717 if (size == UNITS_PER_WORD && aligned_p
2718 && function_label_operand (x, VOIDmode))
2720 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
2721 output_addr_const (asm_out_file, x);
2722 fputc ('\n', asm_out_file);
2725 return default_assemble_integer (x, size, aligned_p);
2728 /* Output an ascii string. */
2730 output_ascii (FILE *file, const char *p, int size)
2734 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2736 /* The HP assembler can only take strings of 256 characters at one
2737 time. This is a limitation on input line length, *not* the
2738 length of the string. Sigh. Even worse, it seems that the
2739 restriction is in number of input characters (see \xnn &
2740 \whatever). So we have to do this very carefully. */
2742 fputs ("\t.STRING \"", file);
2745 for (i = 0; i < size; i += 4)
2749 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2751 register unsigned int c = (unsigned char) p[i + io];
2753 if (c == '\"' || c == '\\')
2754 partial_output[co++] = '\\';
2755 if (c >= ' ' && c < 0177)
2756 partial_output[co++] = c;
2760 partial_output[co++] = '\\';
2761 partial_output[co++] = 'x';
2762 hexd = c / 16 - 0 + '0';
2764 hexd -= '9' - 'a' + 1;
2765 partial_output[co++] = hexd;
2766 hexd = c % 16 - 0 + '0';
2768 hexd -= '9' - 'a' + 1;
2769 partial_output[co++] = hexd;
2772 if (chars_output + co > 243)
2774 fputs ("\"\n\t.STRING \"", file);
2777 fwrite (partial_output, 1, (size_t) co, file);
2781 fputs ("\"\n", file);
2784 /* Try to rewrite floating point comparisons & branches to avoid
2785 useless add,tr insns.
2787 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2788 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2789 first attempt to remove useless add,tr insns. It is zero
2790 for the second pass as reorg sometimes leaves bogus REG_DEAD
2793 When CHECK_NOTES is zero we can only eliminate add,tr insns
2794 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2797 remove_useless_addtr_insns (int check_notes)
2800 static int pass = 0;
2802 /* This is fairly cheap, so always run it when optimizing. */
2806 int fbranch_count = 0;
2808 /* Walk all the insns in this function looking for fcmp & fbranch
2809 instructions. Keep track of how many of each we find. */
2810 for (insn = get_insns (); insn; insn = next_insn (insn))
2814 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2815 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2818 tmp = PATTERN (insn);
2820 /* It must be a set. */
2821 if (GET_CODE (tmp) != SET)
2824 /* If the destination is CCFP, then we've found an fcmp insn. */
2825 tmp = SET_DEST (tmp);
2826 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2832 tmp = PATTERN (insn);
2833 /* If this is an fbranch instruction, bump the fbranch counter. */
2834 if (GET_CODE (tmp) == SET
2835 && SET_DEST (tmp) == pc_rtx
2836 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2837 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2838 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2839 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2847 /* Find all floating point compare + branch insns. If possible,
2848 reverse the comparison & the branch to avoid add,tr insns. */
2849 for (insn = get_insns (); insn; insn = next_insn (insn))
2853 /* Ignore anything that isn't an INSN. */
2854 if (GET_CODE (insn) != INSN)
2857 tmp = PATTERN (insn);
2859 /* It must be a set. */
2860 if (GET_CODE (tmp) != SET)
2863 /* The destination must be CCFP, which is register zero. */
2864 tmp = SET_DEST (tmp);
2865 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2868 /* INSN should be a set of CCFP.
2870 See if the result of this insn is used in a reversed FP
2871 conditional branch. If so, reverse our condition and
2872 the branch. Doing so avoids useless add,tr insns. */
2873 next = next_insn (insn);
2876 /* Jumps, calls and labels stop our search. */
2877 if (GET_CODE (next) == JUMP_INSN
2878 || GET_CODE (next) == CALL_INSN
2879 || GET_CODE (next) == CODE_LABEL)
2882 /* As does another fcmp insn. */
2883 if (GET_CODE (next) == INSN
2884 && GET_CODE (PATTERN (next)) == SET
2885 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2886 && REGNO (SET_DEST (PATTERN (next))) == 0)
2889 next = next_insn (next);
2892 /* Is NEXT_INSN a branch? */
2894 && GET_CODE (next) == JUMP_INSN)
2896 rtx pattern = PATTERN (next);
2898 /* If it a reversed fp conditional branch (eg uses add,tr)
2899 and CCFP dies, then reverse our conditional and the branch
2900 to avoid the add,tr. */
2901 if (GET_CODE (pattern) == SET
2902 && SET_DEST (pattern) == pc_rtx
2903 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2904 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2905 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2906 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2907 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2908 && (fcmp_count == fbranch_count
2910 && find_regno_note (next, REG_DEAD, 0))))
2912 /* Reverse the branch. */
2913 tmp = XEXP (SET_SRC (pattern), 1);
2914 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2915 XEXP (SET_SRC (pattern), 2) = tmp;
2916 INSN_CODE (next) = -1;
2918 /* Reverse our condition. */
2919 tmp = PATTERN (insn);
2920 PUT_CODE (XEXP (tmp, 1),
2921 (reverse_condition_maybe_unordered
2922 (GET_CODE (XEXP (tmp, 1)))));
2932 /* You may have trouble believing this, but this is the 32 bit HP-PA
2937 Variable arguments (optional; any number may be allocated)
2939 SP-(4*(N+9)) arg word N
2944 Fixed arguments (must be allocated; may remain unused)
2953 SP-32 External Data Pointer (DP)
2955 SP-24 External/stub RP (RP')
2959 SP-8 Calling Stub RP (RP'')
2964 SP-0 Stack Pointer (points to next available address)
2968 /* This function saves registers as follows. Registers marked with ' are
2969 this function's registers (as opposed to the previous function's).
2970 If a frame_pointer isn't needed, r4 is saved as a general register;
2971 the space for the frame pointer is still allocated, though, to keep
2977 SP (FP') Previous FP
2978 SP + 4 Alignment filler (sigh)
2979 SP + 8 Space for locals reserved here.
2983 SP + n All call saved register used.
2987 SP + o All call saved fp registers used.
2991 SP + p (SP') points to next available address.
2995 /* Global variables set by output_function_prologue(). */
2996 /* Size of frame. Need to know this to emit return insns from
2998 static int actual_fsize;
2999 static int local_fsize, save_fregs;
3001 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3002 Handle case where DISP > 8k by using the add_high_const patterns.
3004 Note in DISP > 8k case, we will leave the high part of the address
3005 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3008 store_reg (int reg, int disp, int base)
3010 rtx insn, dest, src, basereg;
3012 src = gen_rtx_REG (word_mode, reg);
3013 basereg = gen_rtx_REG (Pmode, base);
3014 if (VAL_14_BITS_P (disp))
3016 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3017 insn = emit_move_insn (dest, src);
3021 rtx delta = GEN_INT (disp);
3022 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3023 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3024 emit_move_insn (tmpreg, high);
3025 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3026 insn = emit_move_insn (dest, src);
3030 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3031 gen_rtx_SET (VOIDmode,
3032 gen_rtx_MEM (word_mode,
3033 gen_rtx_PLUS (word_mode, basereg,
3041 RTX_FRAME_RELATED_P (insn) = 1;
3044 /* Emit RTL to store REG at the memory location specified by BASE and then
3045 add MOD to BASE. MOD must be <= 8k. */
3048 store_reg_modify (int base, int reg, int mod)
3050 rtx insn, basereg, srcreg, delta;
3052 if (! VAL_14_BITS_P (mod))
3055 basereg = gen_rtx_REG (Pmode, base);
3056 srcreg = gen_rtx_REG (word_mode, reg);
3057 delta = GEN_INT (mod);
3059 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3062 RTX_FRAME_RELATED_P (insn) = 1;
3064 /* RTX_FRAME_RELATED_P must be set on each frame related set
3065 in a parallel with more than one element. Don't set
3066 RTX_FRAME_RELATED_P in the first set if reg is temporary
3067 register 1. The effect of this operation is recorded in
3068 the initial copy. */
3071 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3072 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3076 /* The first element of a PARALLEL is always processed if it is
3077 a SET. Thus, we need an expression list for this case. */
3079 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3080 gen_rtx_SET (VOIDmode, basereg,
3081 gen_rtx_PLUS (word_mode, basereg, delta)),
3087 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3088 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3089 whether to add a frame note or not.
3091 In the DISP > 8k case, we leave the high part of the address in %r1.
3092 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3095 set_reg_plus_d (int reg, int base, int disp, int note)
3099 if (VAL_14_BITS_P (disp))
3101 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3102 plus_constant (gen_rtx_REG (Pmode, base), disp));
3106 rtx basereg = gen_rtx_REG (Pmode, base);
3107 rtx delta = GEN_INT (disp);
3109 emit_move_insn (gen_rtx_REG (Pmode, 1),
3110 gen_rtx_PLUS (Pmode, basereg,
3111 gen_rtx_HIGH (Pmode, delta)));
3112 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3113 gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 1),
3117 if (DO_FRAME_NOTES && note)
3118 RTX_FRAME_RELATED_P (insn) = 1;
3122 compute_frame_size (int size, int *fregs_live)
3127 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3128 be consistent with the rounding and size calculation done here.
3129 Change them at the same time. */
3131 /* We do our own stack alignment. First, round the size of the
3132 stack locals up to a word boundary. */
3133 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3135 /* Space for previous frame pointer + filler. If any frame is
3136 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3137 waste some space here for the sake of HP compatibility. The
3138 first slot is only used when the frame pointer is needed. */
3139 if (size || frame_pointer_needed)
3140 size += STARTING_FRAME_OFFSET;
3142 /* If the current function calls __builtin_eh_return, then we need
3143 to allocate stack space for registers that will hold data for
3144 the exception handler. */
3145 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3149 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3151 size += i * UNITS_PER_WORD;
3154 /* Account for space used by the callee general register saves. */
3155 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3156 if (regs_ever_live[i])
3157 size += UNITS_PER_WORD;
3159 /* Account for space used by the callee floating point register saves. */
3160 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3161 if (regs_ever_live[i]
3162 || (!TARGET_64BIT && regs_ever_live[i + 1]))
3166 /* We always save both halves of the FP register, so always
3167 increment the frame size by 8 bytes. */
3171 /* If any of the floating registers are saved, account for the
3172 alignment needed for the floating point register save block. */
3175 size = (size + 7) & ~7;
3180 /* The various ABIs include space for the outgoing parameters in the
3181 size of the current function's stack frame. We don't need to align
3182 for the outgoing arguments as their alignment is set by the final
3183 rounding for the frame as a whole. */
3184 size += current_function_outgoing_args_size;
3186 /* Allocate space for the fixed frame marker. This space must be
3187 allocated for any function that makes calls or allocates
3189 if (!current_function_is_leaf || size)
3190 size += TARGET_64BIT ? 48 : 32;
3192 /* Finally, round to the preferred stack boundary. */
3193 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3194 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3197 /* Generate the assembly code for function entry. FILE is a stdio
3198 stream to output the code to. SIZE is an int: how many units of
3199 temporary storage to allocate.
3201 Refer to the array `regs_ever_live' to determine which registers to
3202 save; `regs_ever_live[I]' is nonzero if register number I is ever
3203 used in the function. This function is responsible for knowing
3204 which registers should not be saved even if used. */
3206 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3207 of memory. If any fpu reg is used in the function, we allocate
3208 such a block here, at the bottom of the frame, just in case it's needed.
3210 If this function is a leaf procedure, then we may choose not
3211 to do a "save" insn. The decision about whether or not
3212 to do this is made in regclass.c. */
3215 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3217 /* The function's label and associated .PROC must never be
3218 separated and must be output *after* any profiling declarations
3219 to avoid changing spaces/subspaces within a procedure. */
3220 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3221 fputs ("\t.PROC\n", file);
3223 /* hppa_expand_prologue does the dirty work now. We just need
3224 to output the assembler directives which denote the start
3226 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
3227 if (regs_ever_live[2])
3228 fputs (",CALLS,SAVE_RP", file);
3230 fputs (",NO_CALLS", file);
3232 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3233 at the beginning of the frame and that it is used as the frame
3234 pointer for the frame. We do this because our current frame
3235 layout doesn't conform to that specified in the the HP runtime
3236 documentation and we need a way to indicate to programs such as
3237 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3238 isn't used by HP compilers but is supported by the assembler.
3239 However, SAVE_SP is supposed to indicate that the previous stack
3240 pointer has been saved in the frame marker. */
3241 if (frame_pointer_needed)
3242 fputs (",SAVE_SP", file);
3244 /* Pass on information about the number of callee register saves
3245 performed in the prologue.
3247 The compiler is supposed to pass the highest register number
3248 saved, the assembler then has to adjust that number before
3249 entering it into the unwind descriptor (to account for any
3250 caller saved registers with lower register numbers than the
3251 first callee saved register). */
3253 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3256 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3258 fputs ("\n\t.ENTRY\n", file);
3260 remove_useless_addtr_insns (0);
3264 hppa_expand_prologue (void)
3266 int merge_sp_adjust_with_store = 0;
3267 int size = get_frame_size ();
3275 /* Compute total size for frame pointer, filler, locals and rounding to
3276 the next word boundary. Similar code appears in compute_frame_size
3277 and must be changed in tandem with this code. */
3278 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3279 if (local_fsize || frame_pointer_needed)
3280 local_fsize += STARTING_FRAME_OFFSET;
3282 actual_fsize = compute_frame_size (size, &save_fregs);
3284 /* Compute a few things we will use often. */
3285 tmpreg = gen_rtx_REG (word_mode, 1);
3287 /* Save RP first. The calling conventions manual states RP will
3288 always be stored into the caller's frame at sp - 20 or sp - 16
3289 depending on which ABI is in use. */
3290 if (regs_ever_live[2] || current_function_calls_eh_return)
3291 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3293 /* Allocate the local frame and set up the frame pointer if needed. */
3294 if (actual_fsize != 0)
3296 if (frame_pointer_needed)
3298 /* Copy the old frame pointer temporarily into %r1. Set up the
3299 new stack pointer, then store away the saved old frame pointer
3300 into the stack at sp and at the same time update the stack
3301 pointer by actual_fsize bytes. Two versions, first
3302 handles small (<8k) frames. The second handles large (>=8k)
3304 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3307 /* We need to record the frame pointer save here since the
3308 new frame pointer is set in the following insn. */
3309 RTX_FRAME_RELATED_P (insn) = 1;
3311 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3312 gen_rtx_SET (VOIDmode,
3313 gen_rtx_MEM (word_mode, stack_pointer_rtx),
3318 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3320 RTX_FRAME_RELATED_P (insn) = 1;
3322 if (VAL_14_BITS_P (actual_fsize))
3323 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3326 /* It is incorrect to store the saved frame pointer at *sp,
3327 then increment sp (writes beyond the current stack boundary).
3329 So instead use stwm to store at *sp and post-increment the
3330 stack pointer as an atomic operation. Then increment sp to
3331 finish allocating the new frame. */
3332 int adjust1 = 8192 - 64;
3333 int adjust2 = actual_fsize - adjust1;
3335 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3336 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3340 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3341 we need to store the previous stack pointer (frame pointer)
3342 into the frame marker on targets that use the HP unwind
3343 library. This allows the HP unwind library to be used to
3344 unwind GCC frames. However, we are not fully compatible
3345 with the HP library because our frame layout differs from
3346 that specified in the HP runtime specification.
3348 We don't want a frame note on this instruction as the frame
3349 marker moves during dynamic stack allocation.
3351 This instruction also serves as a blockage to prevent
3352 register spills from being scheduled before the stack
3353 pointer is raised. This is necessary as we store
3354 registers using the frame pointer as a base register,
3355 and the frame pointer is set before sp is raised. */
3356 if (TARGET_HPUX_UNWIND_LIBRARY)
3358 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3359 GEN_INT (TARGET_64BIT ? -8 : -4));
3361 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3365 emit_insn (gen_blockage ());
3367 /* no frame pointer needed. */
3370 /* In some cases we can perform the first callee register save
3371 and allocating the stack frame at the same time. If so, just
3372 make a note of it and defer allocating the frame until saving
3373 the callee registers. */
3374 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3375 merge_sp_adjust_with_store = 1;
3376 /* Can not optimize. Adjust the stack frame by actual_fsize
3379 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3384 /* Normal register save.
3386 Do not save the frame pointer in the frame_pointer_needed case. It
3387 was done earlier. */
3388 if (frame_pointer_needed)
3390 offset = local_fsize;
3392 /* Saving the EH return data registers in the frame is the simplest
3393 way to get the frame unwind information emitted. We put them
3394 just before the general registers. */
3395 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3397 unsigned int i, regno;
3401 regno = EH_RETURN_DATA_REGNO (i);
3402 if (regno == INVALID_REGNUM)
3405 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3406 offset += UNITS_PER_WORD;
3410 for (i = 18; i >= 4; i--)
3411 if (regs_ever_live[i] && ! call_used_regs[i])
3413 store_reg (i, offset, FRAME_POINTER_REGNUM);
3414 offset += UNITS_PER_WORD;
3417 /* Account for %r3 which is saved in a special place. */
3420 /* No frame pointer needed. */
3423 offset = local_fsize - actual_fsize;
3425 /* Saving the EH return data registers in the frame is the simplest
3426 way to get the frame unwind information emitted. */
3427 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3429 unsigned int i, regno;
3433 regno = EH_RETURN_DATA_REGNO (i);
3434 if (regno == INVALID_REGNUM)
3437 /* If merge_sp_adjust_with_store is nonzero, then we can
3438 optimize the first save. */
3439 if (merge_sp_adjust_with_store)
3441 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3442 merge_sp_adjust_with_store = 0;
3445 store_reg (regno, offset, STACK_POINTER_REGNUM);
3446 offset += UNITS_PER_WORD;
3450 for (i = 18; i >= 3; i--)
3451 if (regs_ever_live[i] && ! call_used_regs[i])
3453 /* If merge_sp_adjust_with_store is nonzero, then we can
3454 optimize the first GR save. */
3455 if (merge_sp_adjust_with_store)
3457 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3458 merge_sp_adjust_with_store = 0;
3461 store_reg (i, offset, STACK_POINTER_REGNUM);
3462 offset += UNITS_PER_WORD;
3466 /* If we wanted to merge the SP adjustment with a GR save, but we never
3467 did any GR saves, then just emit the adjustment here. */
3468 if (merge_sp_adjust_with_store)
3469 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3473 /* The hppa calling conventions say that %r19, the pic offset
3474 register, is saved at sp - 32 (in this function's frame)
3475 when generating PIC code. FIXME: What is the correct thing
3476 to do for functions which make no calls and allocate no
3477 frame? Do we need to allocate a frame, or can we just omit
3478 the save? For now we'll just omit the save.
3480 We don't want a note on this insn as the frame marker can
3481 move if there is a dynamic stack allocation. */
3482 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3484 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3486 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3490 /* Align pointer properly (doubleword boundary). */
3491 offset = (offset + 7) & ~7;
3493 /* Floating point register store. */
3498 /* First get the frame or stack pointer to the start of the FP register
3500 if (frame_pointer_needed)
3502 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3503 base = frame_pointer_rtx;
3507 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3508 base = stack_pointer_rtx;
3511 /* Now actually save the FP registers. */
3512 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3514 if (regs_ever_live[i]
3515 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3517 rtx addr, insn, reg;
3518 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3519 reg = gen_rtx_REG (DFmode, i);
3520 insn = emit_move_insn (addr, reg);
3523 RTX_FRAME_RELATED_P (insn) = 1;
3526 rtx mem = gen_rtx_MEM (DFmode,
3527 plus_constant (base, offset));
3529 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3530 gen_rtx_SET (VOIDmode, mem, reg),
3535 rtx meml = gen_rtx_MEM (SFmode,
3536 plus_constant (base, offset));
3537 rtx memr = gen_rtx_MEM (SFmode,
3538 plus_constant (base, offset + 4));
3539 rtx regl = gen_rtx_REG (SFmode, i);
3540 rtx regr = gen_rtx_REG (SFmode, i + 1);
3541 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3542 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3545 RTX_FRAME_RELATED_P (setl) = 1;
3546 RTX_FRAME_RELATED_P (setr) = 1;
3547 vec = gen_rtvec (2, setl, setr);
3549 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3550 gen_rtx_SEQUENCE (VOIDmode, vec),
3554 offset += GET_MODE_SIZE (DFmode);
3561 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3562 Handle case where DISP > 8k by using the add_high_const patterns. */
3565 load_reg (int reg, int disp, int base)
3567 rtx src, dest, basereg;
3569 dest = gen_rtx_REG (word_mode, reg);
3570 basereg = gen_rtx_REG (Pmode, base);
3571 if (VAL_14_BITS_P (disp))
3573 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3574 emit_move_insn (dest, src);
3578 rtx delta = GEN_INT (disp);
3579 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3580 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3581 emit_move_insn (tmpreg, high);
3582 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3583 emit_move_insn (dest, src);
3587 /* Update the total code bytes output to the text section. */
3590 update_total_code_bytes (int nbytes)
3592 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
3593 && !IN_NAMED_SECTION_P (cfun->decl))
3595 if (INSN_ADDRESSES_SET_P ())
3597 unsigned long old_total = total_code_bytes;
3599 total_code_bytes += nbytes;
3601 /* Be prepared to handle overflows. */
3602 if (old_total > total_code_bytes)
3603 total_code_bytes = -1;
3606 total_code_bytes = -1;
3610 /* This function generates the assembly code for function exit.
3611 Args are as for output_function_prologue ().
3613 The function epilogue should not depend on the current stack
3614 pointer! It should use the frame pointer only. This is mandatory
3615 because of alloca; we also take advantage of it to omit stack
3616 adjustments before returning. */
3619 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3621 rtx insn = get_last_insn ();
3625 /* hppa_expand_epilogue does the dirty work now. We just need
3626 to output the assembler directives which denote the end
3629 To make debuggers happy, emit a nop if the epilogue was completely
3630 eliminated due to a volatile call as the last insn in the
3631 current function. That way the return address (in %r2) will
3632 always point to a valid instruction in the current function. */
3634 /* Get the last real insn. */
3635 if (GET_CODE (insn) == NOTE)
3636 insn = prev_real_insn (insn);
3638 /* If it is a sequence, then look inside. */
3639 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3640 insn = XVECEXP (PATTERN (insn), 0, 0);
3642 /* If insn is a CALL_INSN, then it must be a call to a volatile
3643 function (otherwise there would be epilogue insns). */
3644 if (insn && GET_CODE (insn) == CALL_INSN)
3646 fputs ("\tnop\n", file);
3650 fputs ("\t.EXIT\n\t.PROCEND\n", file);
3652 if (INSN_ADDRESSES_SET_P ())
3654 insn = get_last_nonnote_insn ();
3655 last_address += INSN_ADDRESSES (INSN_UID (insn));
3657 last_address += insn_default_length (insn);
3658 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
3659 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
3662 /* Finally, update the total number of code bytes output so far. */
3663 update_total_code_bytes (last_address);
3667 hppa_expand_epilogue (void)
3671 int merge_sp_adjust_with_load = 0;
3674 /* We will use this often. */
3675 tmpreg = gen_rtx_REG (word_mode, 1);
3677 /* Try to restore RP early to avoid load/use interlocks when
3678 RP gets used in the return (bv) instruction. This appears to still
3679 be necessary even when we schedule the prologue and epilogue. */
3680 if (regs_ever_live [2] || current_function_calls_eh_return)
3682 ret_off = TARGET_64BIT ? -16 : -20;
3683 if (frame_pointer_needed)
3685 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
3690 /* No frame pointer, and stack is smaller than 8k. */
3691 if (VAL_14_BITS_P (ret_off - actual_fsize))
3693 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
3699 /* General register restores. */
3700 if (frame_pointer_needed)
3702 offset = local_fsize;
3704 /* If the current function calls __builtin_eh_return, then we need
3705 to restore the saved EH data registers. */
3706 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3708 unsigned int i, regno;
3712 regno = EH_RETURN_DATA_REGNO (i);
3713 if (regno == INVALID_REGNUM)
3716 load_reg (regno, offset, FRAME_POINTER_REGNUM);
3717 offset += UNITS_PER_WORD;
3721 for (i = 18; i >= 4; i--)
3722 if (regs_ever_live[i] && ! call_used_regs[i])
3724 load_reg (i, offset, FRAME_POINTER_REGNUM);
3725 offset += UNITS_PER_WORD;
3730 offset = local_fsize - actual_fsize;
3732 /* If the current function calls __builtin_eh_return, then we need
3733 to restore the saved EH data registers. */
3734 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3736 unsigned int i, regno;
3740 regno = EH_RETURN_DATA_REGNO (i);
3741 if (regno == INVALID_REGNUM)
3744 /* Only for the first load.
3745 merge_sp_adjust_with_load holds the register load
3746 with which we will merge the sp adjustment. */
3747 if (merge_sp_adjust_with_load == 0
3749 && VAL_14_BITS_P (-actual_fsize))
3750 merge_sp_adjust_with_load = regno;
3752 load_reg (regno, offset, STACK_POINTER_REGNUM);
3753 offset += UNITS_PER_WORD;
3757 for (i = 18; i >= 3; i--)
3759 if (regs_ever_live[i] && ! call_used_regs[i])
3761 /* Only for the first load.
3762 merge_sp_adjust_with_load holds the register load
3763 with which we will merge the sp adjustment. */
3764 if (merge_sp_adjust_with_load == 0
3766 && VAL_14_BITS_P (-actual_fsize))
3767 merge_sp_adjust_with_load = i;
3769 load_reg (i, offset, STACK_POINTER_REGNUM);
3770 offset += UNITS_PER_WORD;
3775 /* Align pointer properly (doubleword boundary). */
3776 offset = (offset + 7) & ~7;
3778 /* FP register restores. */
3781 /* Adjust the register to index off of. */
3782 if (frame_pointer_needed)
3783 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3785 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3787 /* Actually do the restores now. */
3788 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3789 if (regs_ever_live[i]
3790 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3792 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3793 rtx dest = gen_rtx_REG (DFmode, i);
3794 emit_move_insn (dest, src);
3798 /* Emit a blockage insn here to keep these insns from being moved to
3799 an earlier spot in the epilogue, or into the main instruction stream.
3801 This is necessary as we must not cut the stack back before all the
3802 restores are finished. */
3803 emit_insn (gen_blockage ());
3805 /* Reset stack pointer (and possibly frame pointer). The stack
3806 pointer is initially set to fp + 64 to avoid a race condition. */
3807 if (frame_pointer_needed)
3809 rtx delta = GEN_INT (-64);
3811 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
3812 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
3814 /* If we were deferring a callee register restore, do it now. */
3815 else if (merge_sp_adjust_with_load)
3817 rtx delta = GEN_INT (-actual_fsize);
3818 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
3820 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
3822 else if (actual_fsize != 0)
3823 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3826 /* If we haven't restored %r2 yet (no frame pointer, and a stack
3827 frame greater than 8k), do so now. */
3829 load_reg (2, ret_off, STACK_POINTER_REGNUM);
3831 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3833 rtx sa = EH_RETURN_STACKADJ_RTX;
3835 emit_insn (gen_blockage ());
3836 emit_insn (TARGET_64BIT
3837 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
3838 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
3843 hppa_pic_save_rtx (void)
3845 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
3849 hppa_profile_hook (int label_no)
3851 rtx begin_label_rtx, call_insn;
3852 char begin_label_name[16];
3854 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
3856 begin_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (begin_label_name));
3859 emit_move_insn (arg_pointer_rtx,
3860 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
3863 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
3865 #ifndef NO_PROFILE_COUNTERS
3867 rtx count_label_rtx, addr, r24;
3868 char count_label_name[16];
3870 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
3871 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
3873 addr = force_reg (Pmode, count_label_rtx);
3874 r24 = gen_rtx_REG (Pmode, 24);
3875 emit_move_insn (r24, addr);
3877 /* %r25 is set from within the output pattern. */
3879 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3880 GEN_INT (TARGET_64BIT ? 24 : 12),
3883 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
3886 /* %r25 is set from within the output pattern. */
3888 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3889 GEN_INT (TARGET_64BIT ? 16 : 8),
3893 /* Indicate the _mcount call cannot throw, nor will it execute a
3895 REG_NOTES (call_insn)
3896 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
3900 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
3902 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
3904 emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
3908 /* Fetch the return address for the frame COUNT steps up from
3909 the current frame, after the prologue. FRAMEADDR is the
3910 frame pointer of the COUNT frame.
3912 We want to ignore any export stub remnants here. To handle this,
3913 we examine the code at the return address, and if it is an export
3914 stub, we return a memory rtx for the stub return address stored
3917 The value returned is used in two different ways:
3919 1. To find a function's caller.
3921 2. To change the return address for a function.
3923 This function handles most instances of case 1; however, it will
3924 fail if there are two levels of stubs to execute on the return
3925 path. The only way I believe that can happen is if the return value
3926 needs a parameter relocation, which never happens for C code.
3928 This function handles most instances of case 2; however, it will
3929 fail if we did not originally have stub code on the return path
3930 but will need stub code on the new return path. This can happen if
3931 the caller & callee are both in the main program, but the new
3932 return location is in a shared library. */
3935 return_addr_rtx (int count, rtx frameaddr)
3945 rp = get_hard_reg_initial_val (Pmode, 2);
3947 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
3950 saved_rp = gen_reg_rtx (Pmode);
3951 emit_move_insn (saved_rp, rp);
3953 /* Get pointer to the instruction stream. We have to mask out the
3954 privilege level from the two low order bits of the return address
3955 pointer here so that ins will point to the start of the first
3956 instruction that would have been executed if we returned. */
3957 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
3958 label = gen_label_rtx ();
3960 /* Check the instruction stream at the normal return address for the
3963 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3964 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3965 0x00011820 | stub+16: mtsp r1,sr0
3966 0xe0400002 | stub+20: be,n 0(sr0,rp)
3968 If it is an export stub, than our return address is really in
3971 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
3972 NULL_RTX, SImode, 1);
3973 emit_jump_insn (gen_bne (label));
3975 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
3976 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
3977 emit_jump_insn (gen_bne (label));
3979 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
3980 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
3981 emit_jump_insn (gen_bne (label));
3983 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
3984 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
3986 /* If there is no export stub then just use the value saved from
3987 the return pointer register. */
3989 emit_jump_insn (gen_bne (label));
3991 /* Here we know that our return address points to an export
3992 stub. We don't want to return the address of the export stub,
3993 but rather the return address of the export stub. That return
3994 address is stored at -24[frameaddr]. */
3996 emit_move_insn (saved_rp,
3998 memory_address (Pmode,
3999 plus_constant (frameaddr,
4006 /* This is only valid once reload has completed because it depends on
4007 knowing exactly how much (if any) frame there is and...
4009 It's only valid if there is no frame marker to de-allocate and...
4011 It's only valid if %r2 hasn't been saved into the caller's frame
4012 (we're not profiling and %r2 isn't live anywhere). */
4014 hppa_can_use_return_insn_p (void)
4016 return (reload_completed
4017 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
4018 && ! regs_ever_live[2]
4019 && ! frame_pointer_needed);
4023 emit_bcond_fp (enum rtx_code code, rtx operand0)
4025 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4026 gen_rtx_IF_THEN_ELSE (VOIDmode,
4027 gen_rtx_fmt_ee (code,
4029 gen_rtx_REG (CCFPmode, 0),
4031 gen_rtx_LABEL_REF (VOIDmode, operand0),
4037 gen_cmp_fp (enum rtx_code code, rtx operand0, rtx operand1)
4039 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4040 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4043 /* Adjust the cost of a scheduling dependency. Return the new cost of
4044 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4047 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4049 enum attr_type attr_type;
4051 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4052 true dependencies as they are described with bypasses now. */
4053 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4056 if (! recog_memoized (insn))
4059 attr_type = get_attr_type (insn);
4061 if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
4063 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4066 if (attr_type == TYPE_FPLOAD)
4068 rtx pat = PATTERN (insn);
4069 rtx dep_pat = PATTERN (dep_insn);
4070 if (GET_CODE (pat) == PARALLEL)
4072 /* This happens for the fldXs,mb patterns. */
4073 pat = XVECEXP (pat, 0, 0);
4075 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4076 /* If this happens, we have to extend this to schedule
4077 optimally. Return 0 for now. */
4080 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4082 if (! recog_memoized (dep_insn))
4084 switch (get_attr_type (dep_insn))
4091 case TYPE_FPSQRTSGL:
4092 case TYPE_FPSQRTDBL:
4093 /* A fpload can't be issued until one cycle before a
4094 preceding arithmetic operation has finished if
4095 the target of the fpload is any of the sources
4096 (or destination) of the arithmetic operation. */
4097 return insn_default_latency (dep_insn) - 1;
4104 else if (attr_type == TYPE_FPALU)
4106 rtx pat = PATTERN (insn);
4107 rtx dep_pat = PATTERN (dep_insn);
4108 if (GET_CODE (pat) == PARALLEL)
4110 /* This happens for the fldXs,mb patterns. */
4111 pat = XVECEXP (pat, 0, 0);
4113 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4114 /* If this happens, we have to extend this to schedule
4115 optimally. Return 0 for now. */
4118 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4120 if (! recog_memoized (dep_insn))
4122 switch (get_attr_type (dep_insn))
4126 case TYPE_FPSQRTSGL:
4127 case TYPE_FPSQRTDBL:
4128 /* An ALU flop can't be issued until two cycles before a
4129 preceding divide or sqrt operation has finished if
4130 the target of the ALU flop is any of the sources
4131 (or destination) of the divide or sqrt operation. */
4132 return insn_default_latency (dep_insn) - 2;
4140 /* For other anti dependencies, the cost is 0. */
4143 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4145 /* Output dependency; DEP_INSN writes a register that INSN writes some
4147 if (attr_type == TYPE_FPLOAD)
4149 rtx pat = PATTERN (insn);
4150 rtx dep_pat = PATTERN (dep_insn);
4151 if (GET_CODE (pat) == PARALLEL)
4153 /* This happens for the fldXs,mb patterns. */
4154 pat = XVECEXP (pat, 0, 0);
4156 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4157 /* If this happens, we have to extend this to schedule
4158 optimally. Return 0 for now. */
4161 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4163 if (! recog_memoized (dep_insn))
4165 switch (get_attr_type (dep_insn))
4172 case TYPE_FPSQRTSGL:
4173 case TYPE_FPSQRTDBL:
4174 /* A fpload can't be issued until one cycle before a
4175 preceding arithmetic operation has finished if
4176 the target of the fpload is the destination of the
4177 arithmetic operation.
4179 Exception: For PA7100LC, PA7200 and PA7300, the cost
4180 is 3 cycles, unless they bundle together. We also
4181 pay the penalty if the second insn is a fpload. */
4182 return insn_default_latency (dep_insn) - 1;
4189 else if (attr_type == TYPE_FPALU)
4191 rtx pat = PATTERN (insn);
4192 rtx dep_pat = PATTERN (dep_insn);
4193 if (GET_CODE (pat) == PARALLEL)
4195 /* This happens for the fldXs,mb patterns. */
4196 pat = XVECEXP (pat, 0, 0);
4198 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4199 /* If this happens, we have to extend this to schedule
4200 optimally. Return 0 for now. */
4203 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4205 if (! recog_memoized (dep_insn))
4207 switch (get_attr_type (dep_insn))
4211 case TYPE_FPSQRTSGL:
4212 case TYPE_FPSQRTDBL:
4213 /* An ALU flop can't be issued until two cycles before a
4214 preceding divide or sqrt operation has finished if
4215 the target of the ALU flop is also the target of
4216 the divide or sqrt operation. */
4217 return insn_default_latency (dep_insn) - 2;
4225 /* For other output dependencies, the cost is 0. */
4232 /* Adjust scheduling priorities. We use this to try and keep addil
4233 and the next use of %r1 close together. */
4235 pa_adjust_priority (rtx insn, int priority)
4237 rtx set = single_set (insn);
4241 src = SET_SRC (set);
4242 dest = SET_DEST (set);
4243 if (GET_CODE (src) == LO_SUM
4244 && symbolic_operand (XEXP (src, 1), VOIDmode)
4245 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4248 else if (GET_CODE (src) == MEM
4249 && GET_CODE (XEXP (src, 0)) == LO_SUM
4250 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4251 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4254 else if (GET_CODE (dest) == MEM
4255 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4256 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4257 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4263 /* The 700 can only issue a single insn at a time.
4264 The 7XXX processors can issue two insns at a time.
4265 The 8000 can issue 4 insns at a time. */
4267 pa_issue_rate (void)
4271 case PROCESSOR_700: return 1;
4272 case PROCESSOR_7100: return 2;
4273 case PROCESSOR_7100LC: return 2;
4274 case PROCESSOR_7200: return 2;
4275 case PROCESSOR_7300: return 2;
4276 case PROCESSOR_8000: return 4;
4285 /* Return any length adjustment needed by INSN which already has its length
4286 computed as LENGTH. Return zero if no adjustment is necessary.
4288 For the PA: function calls, millicode calls, and backwards short
4289 conditional branches with unfilled delay slots need an adjustment by +1
4290 (to account for the NOP which will be inserted into the instruction stream).
4292 Also compute the length of an inline block move here as it is too
4293 complicated to express as a length attribute in pa.md. */
4295 pa_adjust_insn_length (rtx insn, int length)
4297 rtx pat = PATTERN (insn);
4299 /* Jumps inside switch tables which have unfilled delay slots need
4301 if (GET_CODE (insn) == JUMP_INSN
4302 && simplejump_p (insn)
4303 && GET_MODE (insn) == SImode)
4305 /* Millicode insn with an unfilled delay slot. */
4306 else if (GET_CODE (insn) == INSN
4307 && GET_CODE (pat) != SEQUENCE
4308 && GET_CODE (pat) != USE
4309 && GET_CODE (pat) != CLOBBER
4310 && get_attr_type (insn) == TYPE_MILLI)
4312 /* Block move pattern. */
4313 else if (GET_CODE (insn) == INSN
4314 && GET_CODE (pat) == PARALLEL
4315 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4316 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4317 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4318 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4319 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4320 return compute_movstrsi_length (insn) - 4;
4321 /* Conditional branch with an unfilled delay slot. */
4322 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4324 /* Adjust a short backwards conditional with an unfilled delay slot. */
4325 if (GET_CODE (pat) == SET
4327 && ! forward_branch_p (insn))
4329 else if (GET_CODE (pat) == PARALLEL
4330 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4333 /* Adjust dbra insn with short backwards conditional branch with
4334 unfilled delay slot -- only for case where counter is in a
4335 general register register. */
4336 else if (GET_CODE (pat) == PARALLEL
4337 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4338 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4339 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4341 && ! forward_branch_p (insn))
4349 /* Print operand X (an rtx) in assembler syntax to file FILE.
4350 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4351 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4354 print_operand (FILE *file, rtx x, int code)
4359 /* Output a 'nop' if there's nothing for the delay slot. */
4360 if (dbr_sequence_length () == 0)
4361 fputs ("\n\tnop", file);
4364 /* Output a nullification completer if there's nothing for the */
4365 /* delay slot or nullification is requested. */
4366 if (dbr_sequence_length () == 0 ||
4368 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4372 /* Print out the second register name of a register pair.
4373 I.e., R (6) => 7. */
4374 fputs (reg_names[REGNO (x) + 1], file);
4377 /* A register or zero. */
4379 || (x == CONST0_RTX (DFmode))
4380 || (x == CONST0_RTX (SFmode)))
4382 fputs ("%r0", file);
4388 /* A register or zero (floating point). */
4390 || (x == CONST0_RTX (DFmode))
4391 || (x == CONST0_RTX (SFmode)))
4393 fputs ("%fr0", file);
4402 xoperands[0] = XEXP (XEXP (x, 0), 0);
4403 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4404 output_global_address (file, xoperands[1], 0);
4405 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4409 case 'C': /* Plain (C)ondition */
4411 switch (GET_CODE (x))
4414 fputs ("=", file); break;
4416 fputs ("<>", file); break;
4418 fputs (">", file); break;
4420 fputs (">=", file); break;
4422 fputs (">>=", file); break;
4424 fputs (">>", file); break;
4426 fputs ("<", file); break;
4428 fputs ("<=", file); break;
4430 fputs ("<<=", file); break;
4432 fputs ("<<", file); break;
4437 case 'N': /* Condition, (N)egated */
4438 switch (GET_CODE (x))
4441 fputs ("<>", file); break;
4443 fputs ("=", file); break;
4445 fputs ("<=", file); break;
4447 fputs ("<", file); break;
4449 fputs ("<<", file); break;
4451 fputs ("<<=", file); break;
4453 fputs (">=", file); break;
4455 fputs (">", file); break;
4457 fputs (">>", file); break;
4459 fputs (">>=", file); break;
4464 /* For floating point comparisons. Note that the output
4465 predicates are the complement of the desired mode. */
4467 switch (GET_CODE (x))
4470 fputs ("!=", file); break;
4472 fputs ("=", file); break;
4474 fputs ("!>", file); break;
4476 fputs ("!>=", file); break;
4478 fputs ("!<", file); break;
4480 fputs ("!<=", file); break;
4482 fputs ("!<>", file); break;
4484 fputs (">", file); break;
4486 fputs (">=", file); break;
4488 fputs ("<", file); break;
4490 fputs ("<=", file); break;
4492 fputs ("<>", file); break;
4494 fputs ("<=>", file); break;
4496 fputs ("!<=>", file); break;
4501 case 'S': /* Condition, operands are (S)wapped. */
4502 switch (GET_CODE (x))
4505 fputs ("=", file); break;
4507 fputs ("<>", file); break;
4509 fputs ("<", file); break;
4511 fputs ("<=", file); break;
4513 fputs ("<<=", file); break;
4515 fputs ("<<", file); break;
4517 fputs (">", file); break;
4519 fputs (">=", file); break;
4521 fputs (">>=", file); break;
4523 fputs (">>", file); break;
4528 case 'B': /* Condition, (B)oth swapped and negate. */
4529 switch (GET_CODE (x))
4532 fputs ("<>", file); break;
4534 fputs ("=", file); break;
4536 fputs (">=", file); break;
4538 fputs (">", file); break;
4540 fputs (">>", file); break;
4542 fputs (">>=", file); break;
4544 fputs ("<=", file); break;
4546 fputs ("<", file); break;
4548 fputs ("<<", file); break;
4550 fputs ("<<=", file); break;
4556 if (GET_CODE (x) == CONST_INT)
4558 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4563 if (GET_CODE (x) == CONST_INT)
4565 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4570 if (GET_CODE (x) == CONST_INT)
4572 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4577 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
4579 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4584 if (GET_CODE (x) == CONST_INT)
4586 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4591 if (GET_CODE (x) == CONST_INT)
4593 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4598 if (GET_CODE (x) == CONST_INT)
4603 switch (GET_CODE (XEXP (x, 0)))
4607 if (ASSEMBLER_DIALECT == 0)
4608 fputs ("s,mb", file);
4610 fputs (",mb", file);
4614 if (ASSEMBLER_DIALECT == 0)
4615 fputs ("s,ma", file);
4617 fputs (",ma", file);
4620 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4621 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4623 if (ASSEMBLER_DIALECT == 0)
4624 fputs ("x,s", file);
4628 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4632 if (code == 'F' && ASSEMBLER_DIALECT == 0)
4638 output_global_address (file, x, 0);
4641 output_global_address (file, x, 1);
4643 case 0: /* Don't do anything special */
4648 compute_zdepwi_operands (INTVAL (x), op);
4649 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4655 compute_zdepdi_operands (INTVAL (x), op);
4656 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4660 /* We can get here from a .vtable_inherit due to our
4661 CONSTANT_ADDRESS_P rejecting perfectly good constant
4667 if (GET_CODE (x) == REG)
4669 fputs (reg_names [REGNO (x)], file);
4670 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
4676 && GET_MODE_SIZE (GET_MODE (x)) <= 4
4677 && (REGNO (x) & 1) == 0)
4680 else if (GET_CODE (x) == MEM)
4682 int size = GET_MODE_SIZE (GET_MODE (x));
4683 rtx base = NULL_RTX;
4684 switch (GET_CODE (XEXP (x, 0)))
4688 base = XEXP (XEXP (x, 0), 0);
4689 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
4693 base = XEXP (XEXP (x, 0), 0);
4694 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
4697 if (GET_CODE (XEXP (x, 0)) == PLUS
4698 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
4699 fprintf (file, "%s(%s)",
4700 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
4701 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
4702 else if (GET_CODE (XEXP (x, 0)) == PLUS
4703 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4704 fprintf (file, "%s(%s)",
4705 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
4706 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
4708 output_address (XEXP (x, 0));
4713 output_addr_const (file, x);
4716 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
4719 output_global_address (FILE *file, rtx x, int round_constant)
4722 /* Imagine (high (const (plus ...))). */
4723 if (GET_CODE (x) == HIGH)
4726 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
4727 assemble_name (file, XSTR (x, 0));
4728 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
4730 assemble_name (file, XSTR (x, 0));
4731 fputs ("-$global$", file);
4733 else if (GET_CODE (x) == CONST)
4735 const char *sep = "";
4736 int offset = 0; /* assembler wants -$global$ at end */
4737 rtx base = NULL_RTX;
4739 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4741 base = XEXP (XEXP (x, 0), 0);
4742 output_addr_const (file, base);
4744 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
4745 offset = INTVAL (XEXP (XEXP (x, 0), 0));
4748 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
4750 base = XEXP (XEXP (x, 0), 1);
4751 output_addr_const (file, base);
4753 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
4754 offset = INTVAL (XEXP (XEXP (x, 0), 1));
4757 /* How bogus. The compiler is apparently responsible for
4758 rounding the constant if it uses an LR field selector.
4760 The linker and/or assembler seem a better place since
4761 they have to do this kind of thing already.
4763 If we fail to do this, HP's optimizing linker may eliminate
4764 an addil, but not update the ldw/stw/ldo instruction that
4765 uses the result of the addil. */
4767 offset = ((offset + 0x1000) & ~0x1fff);
4769 if (GET_CODE (XEXP (x, 0)) == PLUS)
4779 else if (GET_CODE (XEXP (x, 0)) == MINUS
4780 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4784 if (!read_only_operand (base, VOIDmode) && !flag_pic)
4785 fputs ("-$global$", file);
4787 fprintf (file, "%s%d", sep, offset);
4790 output_addr_const (file, x);
4793 /* Output boilerplate text to appear at the beginning of the file.
4794 There are several possible versions. */
4795 #define aputs(x) fputs(x, asm_out_file)
4797 pa_file_start_level (void)
4800 aputs ("\t.LEVEL 2.0w\n");
4801 else if (TARGET_PA_20)
4802 aputs ("\t.LEVEL 2.0\n");
4803 else if (TARGET_PA_11)
4804 aputs ("\t.LEVEL 1.1\n");
4806 aputs ("\t.LEVEL 1.0\n");
4810 pa_file_start_space (int sortspace)
4812 aputs ("\t.SPACE $PRIVATE$");
4815 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
4816 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
4817 "\n\t.SPACE $TEXT$");
4820 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
4821 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
4825 pa_file_start_file (int want_version)
4827 if (write_symbols != NO_DEBUG)
4829 output_file_directive (asm_out_file, main_input_filename);
4831 aputs ("\t.version\t\"01.01\"\n");
4836 pa_file_start_mcount (const char *aswhat)
4839 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
4843 pa_elf_file_start (void)
4845 pa_file_start_level ();
4846 pa_file_start_mcount ("ENTRY");
4847 pa_file_start_file (0);
4851 pa_som_file_start (void)
4853 pa_file_start_level ();
4854 pa_file_start_space (0);
4855 aputs ("\t.IMPORT $global$,DATA\n"
4856 "\t.IMPORT $$dyncall,MILLICODE\n");
4857 pa_file_start_mcount ("CODE");
4858 pa_file_start_file (0);
4862 pa_linux_file_start (void)
4864 pa_file_start_file (1);
4865 pa_file_start_level ();
4866 pa_file_start_mcount ("CODE");
4870 pa_hpux64_gas_file_start (void)
4872 pa_file_start_level ();
4873 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
4875 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
4877 pa_file_start_file (1);
4881 pa_hpux64_hpas_file_start (void)
4883 pa_file_start_level ();
4884 pa_file_start_space (1);
4885 pa_file_start_mcount ("CODE");
4886 pa_file_start_file (0);
4890 static struct deferred_plabel *
4891 get_plabel (const char *fname)
4895 /* See if we have already put this function on the list of deferred
4896 plabels. This list is generally small, so a liner search is not
4897 too ugly. If it proves too slow replace it with something faster. */
4898 for (i = 0; i < n_deferred_plabels; i++)
4899 if (strcmp (fname, deferred_plabels[i].name) == 0)
4902 /* If the deferred plabel list is empty, or this entry was not found
4903 on the list, create a new entry on the list. */
4904 if (deferred_plabels == NULL || i == n_deferred_plabels)
4906 const char *real_name;
4908 if (deferred_plabels == 0)
4909 deferred_plabels = (struct deferred_plabel *)
4910 ggc_alloc (sizeof (struct deferred_plabel));
4912 deferred_plabels = (struct deferred_plabel *)
4913 ggc_realloc (deferred_plabels,
4914 ((n_deferred_plabels + 1)
4915 * sizeof (struct deferred_plabel)));
4917 i = n_deferred_plabels++;
4918 deferred_plabels[i].internal_label = gen_label_rtx ();
4919 deferred_plabels[i].name = ggc_strdup (fname);
4921 /* Gross. We have just implicitly taken the address of this function,
4923 real_name = (*targetm.strip_name_encoding) (fname);
4924 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
4927 return &deferred_plabels[i];
4931 output_deferred_plabels (void)
4934 /* If we have deferred plabels, then we need to switch into the data
4935 section and align it to a 4 byte boundary before we output the
4936 deferred plabels. */
4937 if (n_deferred_plabels)
4940 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
4943 /* Now output the deferred plabels. */
4944 for (i = 0; i < n_deferred_plabels; i++)
4946 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4947 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
4948 assemble_integer (gen_rtx_SYMBOL_REF (Pmode, deferred_plabels[i].name),
4949 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
4953 /* HP's millicode routines mean something special to the assembler.
4954 Keep track of which ones we have used. */
4956 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
4957 static void import_milli (enum millicodes);
4958 static char imported[(int) end1000];
4959 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
4960 static const char import_string[] = ".IMPORT $$....,MILLICODE";
4961 #define MILLI_START 10
4964 import_milli (enum millicodes code)
4966 char str[sizeof (import_string)];
4968 if (!imported[(int) code])
4970 imported[(int) code] = 1;
4971 strcpy (str, import_string);
4972 strncpy (str + MILLI_START, milli_names[(int) code], 4);
4973 output_asm_insn (str, 0);
4977 /* The register constraints have put the operands and return value in
4978 the proper registers. */
4981 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
4983 import_milli (mulI);
4984 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
4987 /* Emit the rtl for doing a division by a constant. */
4989 /* Do magic division millicodes exist for this value? */
4990 static const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
4993 /* We'll use an array to keep track of the magic millicodes and
4994 whether or not we've used them already. [n][0] is signed, [n][1] is
4997 static int div_milli[16][2];
5000 div_operand (rtx op, enum machine_mode mode)
5002 return (mode == SImode
5003 && ((GET_CODE (op) == REG && REGNO (op) == 25)
5004 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
5005 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
5009 emit_hpdiv_const (rtx *operands, int unsignedp)
5011 if (GET_CODE (operands[2]) == CONST_INT
5012 && INTVAL (operands[2]) > 0
5013 && INTVAL (operands[2]) < 16
5014 && magic_milli[INTVAL (operands[2])])
5016 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5018 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5021 (PARALLEL, VOIDmode,
5022 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5023 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5025 gen_rtx_REG (SImode, 26),
5027 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5028 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5029 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5030 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5031 gen_rtx_CLOBBER (VOIDmode, ret))));
5032 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5039 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5043 /* If the divisor is a constant, try to use one of the special
5045 if (GET_CODE (operands[0]) == CONST_INT)
5047 static char buf[100];
5048 divisor = INTVAL (operands[0]);
5049 if (!div_milli[divisor][unsignedp])
5051 div_milli[divisor][unsignedp] = 1;
5053 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5055 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5059 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5060 INTVAL (operands[0]));
5061 return output_millicode_call (insn,
5062 gen_rtx_SYMBOL_REF (SImode, buf));
5066 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5067 INTVAL (operands[0]));
5068 return output_millicode_call (insn,
5069 gen_rtx_SYMBOL_REF (SImode, buf));
5072 /* Divisor isn't a special constant. */
5077 import_milli (divU);
5078 return output_millicode_call (insn,
5079 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5083 import_milli (divI);
5084 return output_millicode_call (insn,
5085 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5090 /* Output a $$rem millicode to do mod. */
5093 output_mod_insn (int unsignedp, rtx insn)
5097 import_milli (remU);
5098 return output_millicode_call (insn,
5099 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5103 import_milli (remI);
5104 return output_millicode_call (insn,
5105 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5110 output_arg_descriptor (rtx call_insn)
5112 const char *arg_regs[4];
5113 enum machine_mode arg_mode;
5115 int i, output_flag = 0;
5118 /* We neither need nor want argument location descriptors for the
5119 64bit runtime environment or the ELF32 environment. */
5120 if (TARGET_64BIT || TARGET_ELF32)
5123 for (i = 0; i < 4; i++)
5126 /* Specify explicitly that no argument relocations should take place
5127 if using the portable runtime calling conventions. */
5128 if (TARGET_PORTABLE_RUNTIME)
5130 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5135 if (GET_CODE (call_insn) != CALL_INSN)
5137 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
5139 rtx use = XEXP (link, 0);
5141 if (! (GET_CODE (use) == USE
5142 && GET_CODE (XEXP (use, 0)) == REG
5143 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5146 arg_mode = GET_MODE (XEXP (use, 0));
5147 regno = REGNO (XEXP (use, 0));
5148 if (regno >= 23 && regno <= 26)
5150 arg_regs[26 - regno] = "GR";
5151 if (arg_mode == DImode)
5152 arg_regs[25 - regno] = "GR";
5154 else if (regno >= 32 && regno <= 39)
5156 if (arg_mode == SFmode)
5157 arg_regs[(regno - 32) / 2] = "FR";
5160 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5161 arg_regs[(regno - 34) / 2] = "FR";
5162 arg_regs[(regno - 34) / 2 + 1] = "FU";
5164 arg_regs[(regno - 34) / 2] = "FU";
5165 arg_regs[(regno - 34) / 2 + 1] = "FR";
5170 fputs ("\t.CALL ", asm_out_file);
5171 for (i = 0; i < 4; i++)
5176 fputc (',', asm_out_file);
5177 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5180 fputc ('\n', asm_out_file);
5183 /* Return the class of any secondary reload register that is needed to
5184 move IN into a register in class CLASS using mode MODE.
5186 Profiling has showed this routine and its descendants account for
5187 a significant amount of compile time (~7%). So it has been
5188 optimized to reduce redundant computations and eliminate useless
5191 It might be worthwhile to try and make this a leaf function too. */
5194 secondary_reload_class (enum reg_class class, enum machine_mode mode, rtx in)
5196 int regno, is_symbolic;
5198 /* Trying to load a constant into a FP register during PIC code
5199 generation will require %r1 as a scratch register. */
5201 && GET_MODE_CLASS (mode) == MODE_INT
5202 && FP_REG_CLASS_P (class)
5203 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
5206 /* Profiling showed the PA port spends about 1.3% of its compilation
5207 time in true_regnum from calls inside secondary_reload_class. */
5209 if (GET_CODE (in) == REG)
5212 if (regno >= FIRST_PSEUDO_REGISTER)
5213 regno = true_regnum (in);
5215 else if (GET_CODE (in) == SUBREG)
5216 regno = true_regnum (in);
5220 /* If we have something like (mem (mem (...)), we can safely assume the
5221 inner MEM will end up in a general register after reloading, so there's
5222 no need for a secondary reload. */
5223 if (GET_CODE (in) == MEM
5224 && GET_CODE (XEXP (in, 0)) == MEM)
5227 /* Handle out of range displacement for integer mode loads/stores of
5229 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5230 && GET_MODE_CLASS (mode) == MODE_INT
5231 && FP_REG_CLASS_P (class))
5232 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5233 return GENERAL_REGS;
5235 /* A SAR<->FP register copy requires a secondary register (GPR) as
5236 well as secondary memory. */
5237 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5238 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5239 || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5240 return GENERAL_REGS;
5242 if (GET_CODE (in) == HIGH)
5245 /* Profiling has showed GCC spends about 2.6% of its compilation
5246 time in symbolic_operand from calls inside secondary_reload_class.
5248 We use an inline copy and only compute its return value once to avoid
5250 switch (GET_CODE (in))
5260 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
5261 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
5262 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
5272 && read_only_operand (in, VOIDmode))
5275 if (class != R1_REGS && is_symbolic)
5282 function_arg_padding (enum machine_mode mode, tree type)
5285 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5287 /* Return none if justification is not required. */
5289 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5290 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5293 /* The directions set here are ignored when a BLKmode argument larger
5294 than a word is placed in a register. Different code is used for
5295 the stack and registers. This makes it difficult to have a
5296 consistent data representation for both the stack and registers.
5297 For both runtimes, the justification and padding for arguments on
5298 the stack and in registers should be identical. */
5300 /* The 64-bit runtime specifies left justification for aggregates. */
5303 /* The 32-bit runtime architecture specifies right justification.
5304 When the argument is passed on the stack, the argument is padded
5305 with garbage on the left. The HP compiler pads with zeros. */
5309 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5316 /* Do what is necessary for `va_start'. We look at the current function
5317 to determine if stdargs or varargs is used and fill in an initial
5318 va_list. A pointer to this constructor is returned. */
5321 hppa_builtin_saveregs (void)
5324 tree fntype = TREE_TYPE (current_function_decl);
5325 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5326 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5327 != void_type_node)))
5328 ? UNITS_PER_WORD : 0);
5331 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5333 offset = current_function_arg_offset_rtx;
5339 /* Adjust for varargs/stdarg differences. */
5341 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5343 offset = current_function_arg_offset_rtx;
5345 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5346 from the incoming arg pointer and growing to larger addresses. */
5347 for (i = 26, off = -64; i >= 19; i--, off += 8)
5348 emit_move_insn (gen_rtx_MEM (word_mode,
5349 plus_constant (arg_pointer_rtx, off)),
5350 gen_rtx_REG (word_mode, i));
5352 /* The incoming args pointer points just beyond the flushback area;
5353 normally this is not a serious concern. However, when we are doing
5354 varargs/stdargs we want to make the arg pointer point to the start
5355 of the incoming argument area. */
5356 emit_move_insn (virtual_incoming_args_rtx,
5357 plus_constant (arg_pointer_rtx, -64));
5359 /* Now return a pointer to the first anonymous argument. */
5360 return copy_to_reg (expand_binop (Pmode, add_optab,
5361 virtual_incoming_args_rtx,
5362 offset, 0, 0, OPTAB_LIB_WIDEN));
5365 /* Store general registers on the stack. */
5366 dest = gen_rtx_MEM (BLKmode,
5367 plus_constant (current_function_internal_arg_pointer,
5369 set_mem_alias_set (dest, get_varargs_alias_set ());
5370 set_mem_align (dest, BITS_PER_WORD);
5371 move_block_from_reg (23, dest, 4);
5373 /* move_block_from_reg will emit code to store the argument registers
5374 individually as scalar stores.
5376 However, other insns may later load from the same addresses for
5377 a structure load (passing a struct to a varargs routine).
5379 The alias code assumes that such aliasing can never happen, so we
5380 have to keep memory referencing insns from moving up beyond the
5381 last argument register store. So we emit a blockage insn here. */
5382 emit_insn (gen_blockage ());
5384 return copy_to_reg (expand_binop (Pmode, add_optab,
5385 current_function_internal_arg_pointer,
5386 offset, 0, 0, OPTAB_LIB_WIDEN));
5390 hppa_va_start (tree valist, rtx nextarg)
5392 nextarg = expand_builtin_saveregs ();
5393 std_expand_builtin_va_start (valist, nextarg);
5397 hppa_va_arg (tree valist, tree type)
5399 HOST_WIDE_INT size = int_size_in_bytes (type);
5405 /* Every argument in PA64 is supposed to be passed by value
5406 (including large structs). However, as a GCC extension, we
5407 pass zero and variable sized arguments by reference. Empty
5408 structures are a GCC extension not supported by the HP
5409 compilers. Thus, passing them by reference isn't likely
5410 to conflict with the ABI. For variable sized arguments,
5411 GCC doesn't have the infrastructure to allocate these to
5414 /* Arguments with a size greater than 8 must be aligned 0 MOD 16. */
5416 if (size > UNITS_PER_WORD)
5418 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5419 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
5420 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
5421 build_int_2 (-2 * UNITS_PER_WORD, -1));
5422 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5423 TREE_SIDE_EFFECTS (t) = 1;
5424 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5428 return std_expand_builtin_va_arg (valist, type);
5431 ptr = build_pointer_type (type);
5433 /* Args grow upward. */
5434 t = build (POSTINCREMENT_EXPR, TREE_TYPE (valist), valist,
5435 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5436 TREE_SIDE_EFFECTS (t) = 1;
5438 pptr = build_pointer_type (ptr);
5439 t = build1 (NOP_EXPR, pptr, t);
5440 TREE_SIDE_EFFECTS (t) = 1;
5442 t = build1 (INDIRECT_REF, ptr, t);
5443 TREE_SIDE_EFFECTS (t) = 1;
5446 else /* !TARGET_64BIT */
5448 ptr = build_pointer_type (type);
5450 /* "Large" and variable sized types are passed by reference. */
5451 if (size > 8 || size <= 0)
5453 /* Args grow downward. */
5454 t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
5455 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5456 TREE_SIDE_EFFECTS (t) = 1;
5458 pptr = build_pointer_type (ptr);
5459 t = build1 (NOP_EXPR, pptr, t);
5460 TREE_SIDE_EFFECTS (t) = 1;
5462 t = build1 (INDIRECT_REF, ptr, t);
5463 TREE_SIDE_EFFECTS (t) = 1;
5467 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5468 build_int_2 (-size, -1));
5470 /* Copied from va-pa.h, but we probably don't need to align to
5471 word size, since we generate and preserve that invariant. */
5472 t = build (BIT_AND_EXPR, TREE_TYPE (valist), t,
5473 build_int_2 ((size > 4 ? -8 : -4), -1));
5475 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5476 TREE_SIDE_EFFECTS (t) = 1;
5478 ofs = (8 - size) % 4;
5481 t = build (PLUS_EXPR, TREE_TYPE (valist), t,
5482 build_int_2 (ofs, 0));
5483 TREE_SIDE_EFFECTS (t) = 1;
5486 t = build1 (NOP_EXPR, ptr, t);
5487 TREE_SIDE_EFFECTS (t) = 1;
5492 return expand_expr (t, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5497 /* This routine handles all the normal conditional branch sequences we
5498 might need to generate. It handles compare immediate vs compare
5499 register, nullification of delay slots, varying length branches,
5500 negated branches, and all combinations of the above. It returns the
5501 output appropriate to emit the branch corresponding to all given
5505 output_cbranch (rtx *operands, int nullify, int length, int negated, rtx insn)
5507 static char buf[100];
5511 /* A conditional branch to the following instruction (eg the delay slot)
5512 is asking for a disaster. This can happen when not optimizing and
5513 when jump optimization fails.
5515 While it is usually safe to emit nothing, this can fail if the
5516 preceding instruction is a nullified branch with an empty delay
5517 slot and the same branch target as this branch. We could check
5518 for this but jump optimization should eliminate nop jumps. It
5519 is always safe to emit a nop. */
5520 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
5523 /* If this is a long branch with its delay slot unfilled, set `nullify'
5524 as it can nullify the delay slot and save a nop. */
5525 if (length == 8 && dbr_sequence_length () == 0)
5528 /* If this is a short forward conditional branch which did not get
5529 its delay slot filled, the delay slot can still be nullified. */
5530 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5531 nullify = forward_branch_p (insn);
5533 /* A forward branch over a single nullified insn can be done with a
5534 comclr instruction. This avoids a single cycle penalty due to
5535 mis-predicted branch if we fall through (branch not taken). */
5537 && next_real_insn (insn) != 0
5538 && get_attr_length (next_real_insn (insn)) == 4
5539 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5545 /* All short conditional branches except backwards with an unfilled
5549 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5551 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5552 if (GET_MODE (operands[1]) == DImode)
5555 strcat (buf, "%B3");
5557 strcat (buf, "%S3");
5559 strcat (buf, " %2,%r1,%%r0");
5561 strcat (buf, ",n %2,%r1,%0");
5563 strcat (buf, " %2,%r1,%0");
5566 /* All long conditionals. Note a short backward branch with an
5567 unfilled delay slot is treated just like a long backward branch
5568 with an unfilled delay slot. */
5570 /* Handle weird backwards branch with a filled delay slot
5571 with is nullified. */
5572 if (dbr_sequence_length () != 0
5573 && ! forward_branch_p (insn)
5576 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5577 if (GET_MODE (operands[1]) == DImode)
5580 strcat (buf, "%S3");
5582 strcat (buf, "%B3");
5583 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
5585 /* Handle short backwards branch with an unfilled delay slot.
5586 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
5587 taken and untaken branches. */
5588 else if (dbr_sequence_length () == 0
5589 && ! forward_branch_p (insn)
5590 && INSN_ADDRESSES_SET_P ()
5591 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5592 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5594 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5595 if (GET_MODE (operands[1]) == DImode)
5598 strcat (buf, "%B3 %2,%r1,%0%#");
5600 strcat (buf, "%S3 %2,%r1,%0%#");
5604 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5605 if (GET_MODE (operands[1]) == DImode)
5608 strcat (buf, "%S3");
5610 strcat (buf, "%B3");
5612 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
5614 strcat (buf, " %2,%r1,%%r0\n\tb %0");
5620 xoperands[0] = operands[0];
5621 xoperands[1] = operands[1];
5622 xoperands[2] = operands[2];
5623 xoperands[3] = operands[3];
5625 /* The reversed conditional branch must branch over one additional
5626 instruction if the delay slot is filled. If the delay slot
5627 is empty, the instruction after the reversed condition branch
5628 must be nullified. */
5629 nullify = dbr_sequence_length () == 0;
5630 xoperands[4] = nullify ? GEN_INT (length) : GEN_INT (length + 4);
5632 /* Create a reversed conditional branch which branches around
5633 the following insns. */
5634 if (GET_MODE (operands[1]) != DImode)
5640 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
5643 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
5649 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
5652 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
5661 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
5664 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
5670 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
5673 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
5677 output_asm_insn (buf, xoperands);
5678 return output_lbranch (operands[0], insn);
5686 /* This routine handles long unconditional branches that exceed the
5687 maximum range of a simple branch instruction. */
5690 output_lbranch (rtx dest, rtx insn)
5694 xoperands[0] = dest;
5696 /* First, free up the delay slot. */
5697 if (dbr_sequence_length () != 0)
5699 /* We can't handle a jump in the delay slot. */
5700 if (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN)
5703 final_scan_insn (NEXT_INSN (insn), asm_out_file,
5706 /* Now delete the delay insn. */
5707 PUT_CODE (NEXT_INSN (insn), NOTE);
5708 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5709 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5712 /* Output an insn to save %r1. The runtime documentation doesn't
5713 specify whether the "Clean Up" slot in the callers frame can
5714 be clobbered by the callee. It isn't copied by HP's builtin
5715 alloca, so this suggests that it can be clobbered if necessary.
5716 The "Static Link" location is copied by HP builtin alloca, so
5717 we avoid using it. Using the cleanup slot might be a problem
5718 if we have to interoperate with languages that pass cleanup
5719 information. However, it should be possible to handle these
5720 situations with GCC's asm feature.
5722 The "Current RP" slot is reserved for the called procedure, so
5723 we try to use it when we don't have a frame of our own. It's
5724 rather unlikely that we won't have a frame when we need to emit
5727 Really the way to go long term is a register scavenger; goto
5728 the target of the jump and find a register which we can use
5729 as a scratch to hold the value in %r1. Then, we wouldn't have
5730 to free up the delay slot or clobber a slot that may be needed
5731 for other purposes. */
5734 if (actual_fsize == 0 && !regs_ever_live[2])
5735 /* Use the return pointer slot in the frame marker. */
5736 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
5738 /* Use the slot at -40 in the frame marker since HP builtin
5739 alloca doesn't copy it. */
5740 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
5744 if (actual_fsize == 0 && !regs_ever_live[2])
5745 /* Use the return pointer slot in the frame marker. */
5746 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
5748 /* Use the "Clean Up" slot in the frame marker. In GCC,
5749 the only other use of this location is for copying a
5750 floating point double argument from a floating-point
5751 register to two general registers. The copy is done
5752 as an "atomic" operation when outputting a call, so it
5753 won't interfere with our using the location here. */
5754 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
5757 if (TARGET_PORTABLE_RUNTIME)
5759 output_asm_insn ("ldil L'%0,%%r1", xoperands);
5760 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
5761 output_asm_insn ("bv %%r0(%%r1)", xoperands);
5765 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
5766 if (TARGET_SOM || !TARGET_GAS)
5768 xoperands[1] = gen_label_rtx ();
5769 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
5770 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5771 CODE_LABEL_NUMBER (xoperands[1]));
5772 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
5776 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
5777 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
5779 output_asm_insn ("bv %%r0(%%r1)", xoperands);
5782 /* Now output a very long branch to the original target. */
5783 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
5785 /* Now restore the value of %r1 in the delay slot. */
5788 if (actual_fsize == 0 && !regs_ever_live[2])
5789 return "ldd -16(%%r30),%%r1";
5791 return "ldd -40(%%r30),%%r1";
5795 if (actual_fsize == 0 && !regs_ever_live[2])
5796 return "ldw -20(%%r30),%%r1";
5798 return "ldw -12(%%r30),%%r1";
5802 /* This routine handles all the branch-on-bit conditional branch sequences we
5803 might need to generate. It handles nullification of delay slots,
5804 varying length branches, negated branches and all combinations of the
5805 above. it returns the appropriate output template to emit the branch. */
5808 output_bb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
5809 int negated, rtx insn, int which)
5811 static char buf[100];
5814 /* A conditional branch to the following instruction (eg the delay slot) is
5815 asking for a disaster. I do not think this can happen as this pattern
5816 is only used when optimizing; jump optimization should eliminate the
5817 jump. But be prepared just in case. */
5819 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
5822 /* If this is a long branch with its delay slot unfilled, set `nullify'
5823 as it can nullify the delay slot and save a nop. */
5824 if (length == 8 && dbr_sequence_length () == 0)
5827 /* If this is a short forward conditional branch which did not get
5828 its delay slot filled, the delay slot can still be nullified. */
5829 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5830 nullify = forward_branch_p (insn);
5832 /* A forward branch over a single nullified insn can be done with a
5833 extrs instruction. This avoids a single cycle penalty due to
5834 mis-predicted branch if we fall through (branch not taken). */
5837 && next_real_insn (insn) != 0
5838 && get_attr_length (next_real_insn (insn)) == 4
5839 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5846 /* All short conditional branches except backwards with an unfilled
5850 strcpy (buf, "{extrs,|extrw,s,}");
5852 strcpy (buf, "bb,");
5853 if (useskip && GET_MODE (operands[0]) == DImode)
5854 strcpy (buf, "extrd,s,*");
5855 else if (GET_MODE (operands[0]) == DImode)
5856 strcpy (buf, "bb,*");
5857 if ((which == 0 && negated)
5858 || (which == 1 && ! negated))
5863 strcat (buf, " %0,%1,1,%%r0");
5864 else if (nullify && negated)
5865 strcat (buf, ",n %0,%1,%3");
5866 else if (nullify && ! negated)
5867 strcat (buf, ",n %0,%1,%2");
5868 else if (! nullify && negated)
5869 strcat (buf, "%0,%1,%3");
5870 else if (! nullify && ! negated)
5871 strcat (buf, " %0,%1,%2");
5874 /* All long conditionals. Note a short backward branch with an
5875 unfilled delay slot is treated just like a long backward branch
5876 with an unfilled delay slot. */
5878 /* Handle weird backwards branch with a filled delay slot
5879 with is nullified. */
5880 if (dbr_sequence_length () != 0
5881 && ! forward_branch_p (insn)
5884 strcpy (buf, "bb,");
5885 if (GET_MODE (operands[0]) == DImode)
5887 if ((which == 0 && negated)
5888 || (which == 1 && ! negated))
5893 strcat (buf, ",n %0,%1,.+12\n\tb %3");
5895 strcat (buf, ",n %0,%1,.+12\n\tb %2");
5897 /* Handle short backwards branch with an unfilled delay slot.
5898 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5899 taken and untaken branches. */
5900 else if (dbr_sequence_length () == 0
5901 && ! forward_branch_p (insn)
5902 && INSN_ADDRESSES_SET_P ()
5903 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5904 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5906 strcpy (buf, "bb,");
5907 if (GET_MODE (operands[0]) == DImode)
5909 if ((which == 0 && negated)
5910 || (which == 1 && ! negated))
5915 strcat (buf, " %0,%1,%3%#");
5917 strcat (buf, " %0,%1,%2%#");
5921 strcpy (buf, "{extrs,|extrw,s,}");
5922 if (GET_MODE (operands[0]) == DImode)
5923 strcpy (buf, "extrd,s,*");
5924 if ((which == 0 && negated)
5925 || (which == 1 && ! negated))
5929 if (nullify && negated)
5930 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
5931 else if (nullify && ! negated)
5932 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
5934 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
5936 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
5946 /* This routine handles all the branch-on-variable-bit conditional branch
5947 sequences we might need to generate. It handles nullification of delay
5948 slots, varying length branches, negated branches and all combinations
5949 of the above. it returns the appropriate output template to emit the
5953 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
5954 int negated, rtx insn, int which)
5956 static char buf[100];
5959 /* A conditional branch to the following instruction (eg the delay slot) is
5960 asking for a disaster. I do not think this can happen as this pattern
5961 is only used when optimizing; jump optimization should eliminate the
5962 jump. But be prepared just in case. */
5964 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
5967 /* If this is a long branch with its delay slot unfilled, set `nullify'
5968 as it can nullify the delay slot and save a nop. */
5969 if (length == 8 && dbr_sequence_length () == 0)
5972 /* If this is a short forward conditional branch which did not get
5973 its delay slot filled, the delay slot can still be nullified. */
5974 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5975 nullify = forward_branch_p (insn);
5977 /* A forward branch over a single nullified insn can be done with a
5978 extrs instruction. This avoids a single cycle penalty due to
5979 mis-predicted branch if we fall through (branch not taken). */
5982 && next_real_insn (insn) != 0
5983 && get_attr_length (next_real_insn (insn)) == 4
5984 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5991 /* All short conditional branches except backwards with an unfilled
5995 strcpy (buf, "{vextrs,|extrw,s,}");
5997 strcpy (buf, "{bvb,|bb,}");
5998 if (useskip && GET_MODE (operands[0]) == DImode)
5999 strcpy (buf, "extrd,s,*");
6000 else if (GET_MODE (operands[0]) == DImode)
6001 strcpy (buf, "bb,*");
6002 if ((which == 0 && negated)
6003 || (which == 1 && ! negated))
6008 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6009 else if (nullify && negated)
6010 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6011 else if (nullify && ! negated)
6012 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6013 else if (! nullify && negated)
6014 strcat (buf, "{%0,%3|%0,%%sar,%3}");
6015 else if (! nullify && ! negated)
6016 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6019 /* All long conditionals. Note a short backward branch with an
6020 unfilled delay slot is treated just like a long backward branch
6021 with an unfilled delay slot. */
6023 /* Handle weird backwards branch with a filled delay slot
6024 with is nullified. */
6025 if (dbr_sequence_length () != 0
6026 && ! forward_branch_p (insn)
6029 strcpy (buf, "{bvb,|bb,}");
6030 if (GET_MODE (operands[0]) == DImode)
6032 if ((which == 0 && negated)
6033 || (which == 1 && ! negated))
6038 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6040 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6042 /* Handle short backwards branch with an unfilled delay slot.
6043 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6044 taken and untaken branches. */
6045 else if (dbr_sequence_length () == 0
6046 && ! forward_branch_p (insn)
6047 && INSN_ADDRESSES_SET_P ()
6048 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6049 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6051 strcpy (buf, "{bvb,|bb,}");
6052 if (GET_MODE (operands[0]) == DImode)
6054 if ((which == 0 && negated)
6055 || (which == 1 && ! negated))
6060 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6062 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6066 strcpy (buf, "{vextrs,|extrw,s,}");
6067 if (GET_MODE (operands[0]) == DImode)
6068 strcpy (buf, "extrd,s,*");
6069 if ((which == 0 && negated)
6070 || (which == 1 && ! negated))
6074 if (nullify && negated)
6075 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6076 else if (nullify && ! negated)
6077 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6079 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6081 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6091 /* Return the output template for emitting a dbra type insn.
6093 Note it may perform some output operations on its own before
6094 returning the final output string. */
6096 output_dbra (rtx *operands, rtx insn, int which_alternative)
6099 /* A conditional branch to the following instruction (eg the delay slot) is
6100 asking for a disaster. Be prepared! */
6102 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6104 if (which_alternative == 0)
6105 return "ldo %1(%0),%0";
6106 else if (which_alternative == 1)
6108 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6109 output_asm_insn ("ldw -16(%%r30),%4", operands);
6110 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6111 return "{fldws|fldw} -16(%%r30),%0";
6115 output_asm_insn ("ldw %0,%4", operands);
6116 return "ldo %1(%4),%4\n\tstw %4,%0";
6120 if (which_alternative == 0)
6122 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6123 int length = get_attr_length (insn);
6125 /* If this is a long branch with its delay slot unfilled, set `nullify'
6126 as it can nullify the delay slot and save a nop. */
6127 if (length == 8 && dbr_sequence_length () == 0)
6130 /* If this is a short forward conditional branch which did not get
6131 its delay slot filled, the delay slot can still be nullified. */
6132 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6133 nullify = forward_branch_p (insn);
6135 /* Handle short versions first. */
6136 if (length == 4 && nullify)
6137 return "addib,%C2,n %1,%0,%3";
6138 else if (length == 4 && ! nullify)
6139 return "addib,%C2 %1,%0,%3";
6140 else if (length == 8)
6142 /* Handle weird backwards branch with a fulled delay slot
6143 which is nullified. */
6144 if (dbr_sequence_length () != 0
6145 && ! forward_branch_p (insn)
6147 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6148 /* Handle short backwards branch with an unfilled delay slot.
6149 Using a addb;nop rather than addi;bl saves 1 cycle for both
6150 taken and untaken branches. */
6151 else if (dbr_sequence_length () == 0
6152 && ! forward_branch_p (insn)
6153 && INSN_ADDRESSES_SET_P ()
6154 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6155 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6156 return "addib,%C2 %1,%0,%3%#";
6158 /* Handle normal cases. */
6160 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6162 return "addi,%N2 %1,%0,%0\n\tb %3";
6167 /* Deal with gross reload from FP register case. */
6168 else if (which_alternative == 1)
6170 /* Move loop counter from FP register to MEM then into a GR,
6171 increment the GR, store the GR into MEM, and finally reload
6172 the FP register from MEM from within the branch's delay slot. */
6173 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6175 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6176 if (get_attr_length (insn) == 24)
6177 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6179 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6181 /* Deal with gross reload from memory case. */
6184 /* Reload loop counter from memory, the store back to memory
6185 happens in the branch's delay slot. */
6186 output_asm_insn ("ldw %0,%4", operands);
6187 if (get_attr_length (insn) == 12)
6188 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6190 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6194 /* Return the output template for emitting a dbra type insn.
6196 Note it may perform some output operations on its own before
6197 returning the final output string. */
6199 output_movb (rtx *operands, rtx insn, int which_alternative,
6200 int reverse_comparison)
6203 /* A conditional branch to the following instruction (eg the delay slot) is
6204 asking for a disaster. Be prepared! */
6206 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6208 if (which_alternative == 0)
6209 return "copy %1,%0";
6210 else if (which_alternative == 1)
6212 output_asm_insn ("stw %1,-16(%%r30)", operands);
6213 return "{fldws|fldw} -16(%%r30),%0";
6215 else if (which_alternative == 2)
6221 /* Support the second variant. */
6222 if (reverse_comparison)
6223 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6225 if (which_alternative == 0)
6227 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6228 int length = get_attr_length (insn);
6230 /* If this is a long branch with its delay slot unfilled, set `nullify'
6231 as it can nullify the delay slot and save a nop. */
6232 if (length == 8 && dbr_sequence_length () == 0)
6235 /* If this is a short forward conditional branch which did not get
6236 its delay slot filled, the delay slot can still be nullified. */
6237 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6238 nullify = forward_branch_p (insn);
6240 /* Handle short versions first. */
6241 if (length == 4 && nullify)
6242 return "movb,%C2,n %1,%0,%3";
6243 else if (length == 4 && ! nullify)
6244 return "movb,%C2 %1,%0,%3";
6245 else if (length == 8)
6247 /* Handle weird backwards branch with a filled delay slot
6248 which is nullified. */
6249 if (dbr_sequence_length () != 0
6250 && ! forward_branch_p (insn)
6252 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6254 /* Handle short backwards branch with an unfilled delay slot.
6255 Using a movb;nop rather than or;bl saves 1 cycle for both
6256 taken and untaken branches. */
6257 else if (dbr_sequence_length () == 0
6258 && ! forward_branch_p (insn)
6259 && INSN_ADDRESSES_SET_P ()
6260 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6261 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6262 return "movb,%C2 %1,%0,%3%#";
6263 /* Handle normal cases. */
6265 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6267 return "or,%N2 %1,%%r0,%0\n\tb %3";
6272 /* Deal with gross reload from FP register case. */
6273 else if (which_alternative == 1)
6275 /* Move loop counter from FP register to MEM then into a GR,
6276 increment the GR, store the GR into MEM, and finally reload
6277 the FP register from MEM from within the branch's delay slot. */
6278 output_asm_insn ("stw %1,-16(%%r30)", operands);
6279 if (get_attr_length (insn) == 12)
6280 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6282 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6284 /* Deal with gross reload from memory case. */
6285 else if (which_alternative == 2)
6287 /* Reload loop counter from memory, the store back to memory
6288 happens in the branch's delay slot. */
6289 if (get_attr_length (insn) == 8)
6290 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6292 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6294 /* Handle SAR as a destination. */
6297 if (get_attr_length (insn) == 8)
6298 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6300 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
6304 /* Copy any FP arguments in INSN into integer registers. */
6306 copy_fp_args (rtx insn)
6311 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6313 int arg_mode, regno;
6314 rtx use = XEXP (link, 0);
6316 if (! (GET_CODE (use) == USE
6317 && GET_CODE (XEXP (use, 0)) == REG
6318 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6321 arg_mode = GET_MODE (XEXP (use, 0));
6322 regno = REGNO (XEXP (use, 0));
6324 /* Is it a floating point register? */
6325 if (regno >= 32 && regno <= 39)
6327 /* Copy the FP register into an integer register via memory. */
6328 if (arg_mode == SFmode)
6330 xoperands[0] = XEXP (use, 0);
6331 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6332 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
6333 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6337 xoperands[0] = XEXP (use, 0);
6338 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6339 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
6340 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6341 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6347 /* Compute length of the FP argument copy sequence for INSN. */
6349 length_fp_args (rtx insn)
6354 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6356 int arg_mode, regno;
6357 rtx use = XEXP (link, 0);
6359 if (! (GET_CODE (use) == USE
6360 && GET_CODE (XEXP (use, 0)) == REG
6361 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6364 arg_mode = GET_MODE (XEXP (use, 0));
6365 regno = REGNO (XEXP (use, 0));
6367 /* Is it a floating point register? */
6368 if (regno >= 32 && regno <= 39)
6370 if (arg_mode == SFmode)
6380 /* Return the attribute length for the millicode call instruction INSN.
6381 The length must match the code generated by output_millicode_call.
6382 We include the delay slot in the returned length as it is better to
6383 over estimate the length than to under estimate it. */
6386 attr_length_millicode_call (rtx insn)
6388 unsigned long distance = -1;
6389 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
6391 if (INSN_ADDRESSES_SET_P ())
6393 distance = (total + insn_current_reference_address (insn));
6394 if (distance < total)
6400 if (!TARGET_LONG_CALLS && distance < 7600000)
6405 else if (TARGET_PORTABLE_RUNTIME)
6409 if (!TARGET_LONG_CALLS && distance < 240000)
6412 if (TARGET_LONG_ABS_CALL && !flag_pic)
6419 /* INSN is a function call. It may have an unconditional jump
6422 CALL_DEST is the routine we are calling. */
6425 output_millicode_call (rtx insn, rtx call_dest)
6427 int attr_length = get_attr_length (insn);
6428 int seq_length = dbr_sequence_length ();
6433 xoperands[0] = call_dest;
6434 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6436 /* Handle the common case where we are sure that the branch will
6437 reach the beginning of the $CODE$ subspace. The within reach
6438 form of the $$sh_func_adrs call has a length of 28. Because
6439 it has an attribute type of multi, it never has a nonzero
6440 sequence length. The length of the $$sh_func_adrs is the same
6441 as certain out of reach PIC calls to other routines. */
6442 if (!TARGET_LONG_CALLS
6443 && ((seq_length == 0
6444 && (attr_length == 12
6445 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
6446 || (seq_length != 0 && attr_length == 8)))
6448 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
6454 /* It might seem that one insn could be saved by accessing
6455 the millicode function using the linkage table. However,
6456 this doesn't work in shared libraries and other dynamically
6457 loaded objects. Using a pc-relative sequence also avoids
6458 problems related to the implicit use of the gp register. */
6459 output_asm_insn ("b,l .+8,%%r1", xoperands);
6463 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
6464 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6468 xoperands[1] = gen_label_rtx ();
6469 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6470 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6471 CODE_LABEL_NUMBER (xoperands[1]));
6472 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6475 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6477 else if (TARGET_PORTABLE_RUNTIME)
6479 /* Pure portable runtime doesn't allow be/ble; we also don't
6480 have PIC support in the assembler/linker, so this sequence
6483 /* Get the address of our target into %r1. */
6484 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6485 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6487 /* Get our return address into %r31. */
6488 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
6489 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
6491 /* Jump to our target address in %r1. */
6492 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6496 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6498 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
6500 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6504 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6505 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
6507 if (TARGET_SOM || !TARGET_GAS)
6509 /* The HP assembler can generate relocations for the
6510 difference of two symbols. GAS can do this for a
6511 millicode symbol but not an arbitrary external
6512 symbol when generating SOM output. */
6513 xoperands[1] = gen_label_rtx ();
6514 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6515 CODE_LABEL_NUMBER (xoperands[1]));
6516 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6517 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6521 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
6522 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
6526 /* Jump to our target address in %r1. */
6527 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6531 if (seq_length == 0)
6532 output_asm_insn ("nop", xoperands);
6534 /* We are done if there isn't a jump in the delay slot. */
6535 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6538 /* This call has an unconditional jump in its delay slot. */
6539 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6541 /* See if the return address can be adjusted. Use the containing
6542 sequence insn's address. */
6543 if (INSN_ADDRESSES_SET_P ())
6545 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6546 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6547 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
6549 if (VAL_14_BITS_P (distance))
6551 xoperands[1] = gen_label_rtx ();
6552 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
6553 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6554 CODE_LABEL_NUMBER (xoperands[1]));
6557 /* ??? This branch may not reach its target. */
6558 output_asm_insn ("nop\n\tb,n %0", xoperands);
6561 /* ??? This branch may not reach its target. */
6562 output_asm_insn ("nop\n\tb,n %0", xoperands);
6564 /* Delete the jump. */
6565 PUT_CODE (NEXT_INSN (insn), NOTE);
6566 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6567 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6572 /* Return the attribute length of the call instruction INSN. The SIBCALL
6573 flag indicates whether INSN is a regular call or a sibling call. The
6574 length returned must be longer than the code actually generated by
6575 output_call. Since branch shortening is done before delay branch
6576 sequencing, there is no way to determine whether or not the delay
6577 slot will be filled during branch shortening. Even when the delay
6578 slot is filled, we may have to add a nop if the delay slot contains
6579 a branch that can't reach its target. Thus, we always have to include
6580 the delay slot in the length estimate. This used to be done in
6581 pa_adjust_insn_length but we do it here now as some sequences always
6582 fill the delay slot and we can save four bytes in the estimate for
6586 attr_length_call (rtx insn, int sibcall)
6592 rtx pat = PATTERN (insn);
6593 unsigned long distance = -1;
6595 if (INSN_ADDRESSES_SET_P ())
6597 unsigned long total;
6599 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
6600 distance = (total + insn_current_reference_address (insn));
6601 if (distance < total)
6605 /* Determine if this is a local call. */
6606 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL)
6607 call_dest = XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0);
6609 call_dest = XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0);
6611 call_decl = SYMBOL_REF_DECL (call_dest);
6612 local_call = call_decl && (*targetm.binds_local_p) (call_decl);
6614 /* pc-relative branch. */
6615 if (!TARGET_LONG_CALLS
6616 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
6617 || distance < 240000))
6620 /* 64-bit plabel sequence. */
6621 else if (TARGET_64BIT && !local_call)
6622 length += sibcall ? 28 : 24;
6624 /* non-pic long absolute branch sequence. */
6625 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
6628 /* long pc-relative branch sequence. */
6629 else if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
6630 || (TARGET_64BIT && !TARGET_GAS)
6631 || (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
6635 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS)
6639 /* 32-bit plabel sequence. */
6645 length += length_fp_args (insn);
6655 if (!TARGET_NO_SPACE_REGS)
6663 /* INSN is a function call. It may have an unconditional jump
6666 CALL_DEST is the routine we are calling. */
6669 output_call (rtx insn, rtx call_dest, int sibcall)
6671 int delay_insn_deleted = 0;
6672 int delay_slot_filled = 0;
6673 int seq_length = dbr_sequence_length ();
6674 tree call_decl = SYMBOL_REF_DECL (call_dest);
6675 int local_call = call_decl && (*targetm.binds_local_p) (call_decl);
6678 xoperands[0] = call_dest;
6680 /* Handle the common case where we're sure that the branch will reach
6681 the beginning of the "$CODE$" subspace. This is the beginning of
6682 the current function if we are in a named section. */
6683 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
6685 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
6686 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
6690 if (TARGET_64BIT && !local_call)
6692 /* ??? As far as I can tell, the HP linker doesn't support the
6693 long pc-relative sequence described in the 64-bit runtime
6694 architecture. So, we use a slightly longer indirect call. */
6695 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
6697 xoperands[0] = p->internal_label;
6698 xoperands[1] = gen_label_rtx ();
6700 /* If this isn't a sibcall, we put the load of %r27 into the
6701 delay slot. We can't do this in a sibcall as we don't
6702 have a second call-clobbered scratch register available. */
6704 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6707 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6710 /* Now delete the delay insn. */
6711 PUT_CODE (NEXT_INSN (insn), NOTE);
6712 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6713 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6714 delay_insn_deleted = 1;
6717 output_asm_insn ("addil LT'%0,%%r27", xoperands);
6718 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
6719 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
6723 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
6724 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
6725 output_asm_insn ("bve (%%r1)", xoperands);
6729 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
6730 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
6731 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
6732 delay_slot_filled = 1;
6737 int indirect_call = 0;
6739 /* Emit a long call. There are several different sequences
6740 of increasing length and complexity. In most cases,
6741 they don't allow an instruction in the delay slot. */
6742 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
6743 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
6744 && !(TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
6749 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6751 && (!TARGET_PA_20 || indirect_call))
6753 /* A non-jump insn in the delay slot. By definition we can
6754 emit this insn before the call (and in fact before argument
6756 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
6758 /* Now delete the delay insn. */
6759 PUT_CODE (NEXT_INSN (insn), NOTE);
6760 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6761 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6762 delay_insn_deleted = 1;
6765 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
6767 /* This is the best sequence for making long calls in
6768 non-pic code. Unfortunately, GNU ld doesn't provide
6769 the stub needed for external calls, and GAS's support
6770 for this with the SOM linker is buggy. It is safe
6771 to use this for local calls. */
6772 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6774 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
6778 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
6781 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6783 output_asm_insn ("copy %%r31,%%r2", xoperands);
6784 delay_slot_filled = 1;
6789 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
6790 || (TARGET_64BIT && !TARGET_GAS))
6792 /* The HP assembler and linker can handle relocations
6793 for the difference of two symbols. GAS and the HP
6794 linker can't do this when one of the symbols is
6796 xoperands[1] = gen_label_rtx ();
6797 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6798 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6799 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6800 CODE_LABEL_NUMBER (xoperands[1]));
6801 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6803 else if (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
6805 /* GAS currently can't generate the relocations that
6806 are needed for the SOM linker under HP-UX using this
6807 sequence. The GNU linker doesn't generate the stubs
6808 that are needed for external calls on TARGET_ELF32
6809 with this sequence. For now, we have to use a
6810 longer plabel sequence when using GAS. */
6811 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6812 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
6814 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
6819 /* Emit a long plabel-based call sequence. This is
6820 essentially an inline implementation of $$dyncall.
6821 We don't actually try to call $$dyncall as this is
6822 as difficult as calling the function itself. */
6823 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
6825 xoperands[0] = p->internal_label;
6826 xoperands[1] = gen_label_rtx ();
6828 /* Since the call is indirect, FP arguments in registers
6829 need to be copied to the general registers. Then, the
6830 argument relocation stub will copy them back. */
6832 copy_fp_args (insn);
6836 output_asm_insn ("addil LT'%0,%%r19", xoperands);
6837 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
6838 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
6842 output_asm_insn ("addil LR'%0-$global$,%%r27",
6844 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
6848 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
6849 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
6850 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
6851 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
6853 if (!sibcall && !TARGET_PA_20)
6855 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
6856 if (TARGET_NO_SPACE_REGS)
6857 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
6859 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
6866 output_asm_insn ("bve (%%r1)", xoperands);
6871 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6872 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
6873 delay_slot_filled = 1;
6876 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6881 if (!TARGET_NO_SPACE_REGS)
6882 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
6887 if (TARGET_NO_SPACE_REGS)
6888 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
6890 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
6894 if (TARGET_NO_SPACE_REGS)
6895 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
6897 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
6900 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
6902 output_asm_insn ("copy %%r31,%%r2", xoperands);
6903 delay_slot_filled = 1;
6910 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
6911 output_asm_insn ("nop", xoperands);
6913 /* We are done if there isn't a jump in the delay slot. */
6915 || delay_insn_deleted
6916 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6919 /* A sibcall should never have a branch in the delay slot. */
6923 /* This call has an unconditional jump in its delay slot. */
6924 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6926 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
6928 /* See if the return address can be adjusted. Use the containing
6929 sequence insn's address. */
6930 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6931 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6932 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
6934 if (VAL_14_BITS_P (distance))
6936 xoperands[1] = gen_label_rtx ();
6937 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
6938 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6939 CODE_LABEL_NUMBER (xoperands[1]));
6942 output_asm_insn ("nop\n\tb,n %0", xoperands);
6945 output_asm_insn ("b,n %0", xoperands);
6947 /* Delete the jump. */
6948 PUT_CODE (NEXT_INSN (insn), NOTE);
6949 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6950 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6955 /* Return the attribute length of the indirect call instruction INSN.
6956 The length must match the code generated by output_indirect call.
6957 The returned length includes the delay slot. Currently, the delay
6958 slot of an indirect call sequence is not exposed and it is used by
6959 the sequence itself. */
6962 attr_length_indirect_call (rtx insn)
6964 unsigned long distance = -1;
6965 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
6967 if (INSN_ADDRESSES_SET_P ())
6969 distance = (total + insn_current_reference_address (insn));
6970 if (distance < total)
6977 if (TARGET_FAST_INDIRECT_CALLS
6978 || (!TARGET_PORTABLE_RUNTIME
6979 && ((TARGET_PA_20 && distance < 7600000) || distance < 240000)))
6985 if (TARGET_PORTABLE_RUNTIME)
6988 /* Out of reach, can use ble. */
6993 output_indirect_call (rtx insn, rtx call_dest)
6999 xoperands[0] = call_dest;
7000 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7001 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7005 /* First the special case for kernels, level 0 systems, etc. */
7006 if (TARGET_FAST_INDIRECT_CALLS)
7007 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7009 /* Now the normal case -- we can reach $$dyncall directly or
7010 we're sure that we can get there via a long-branch stub.
7012 No need to check target flags as the length uniquely identifies
7013 the remaining cases. */
7014 if (attr_length_indirect_call (insn) == 8)
7015 return ".CALL\tARGW0=GR\n\t{bl|b,l} $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7017 /* Long millicode call, but we are not generating PIC or portable runtime
7019 if (attr_length_indirect_call (insn) == 12)
7020 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7022 /* Long millicode call for portable runtime. */
7023 if (attr_length_indirect_call (insn) == 20)
7024 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7026 /* We need a long PIC call to $$dyncall. */
7027 xoperands[0] = NULL_RTX;
7028 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7029 if (TARGET_SOM || !TARGET_GAS)
7031 xoperands[0] = gen_label_rtx ();
7032 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7033 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7034 CODE_LABEL_NUMBER (xoperands[0]));
7035 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7039 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7040 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7043 output_asm_insn ("blr %%r0,%%r2", xoperands);
7044 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7048 /* Return the total length of the save and restore instructions needed for
7049 the data linkage table pointer (i.e., the PIC register) across the call
7050 instruction INSN. No-return calls do not require a save and restore.
7051 In addition, we may be able to avoid the save and restore for calls
7052 within the same translation unit. */
7055 attr_length_save_restore_dltp (rtx insn)
7057 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7063 /* In HPUX 8.0's shared library scheme, special relocations are needed
7064 for function labels if they might be passed to a function
7065 in a shared library (because shared libraries don't live in code
7066 space), and special magic is needed to construct their address. */
7069 hppa_encode_label (rtx sym)
7071 const char *str = XSTR (sym, 0);
7072 int len = strlen (str) + 1;
7075 p = newstr = alloca (len + 1);
7079 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7083 pa_encode_section_info (tree decl, rtx rtl, int first)
7085 if (first && TEXT_SPACE_P (decl))
7087 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7088 if (TREE_CODE (decl) == FUNCTION_DECL)
7089 hppa_encode_label (XEXP (rtl, 0));
7093 /* This is sort of inverse to pa_encode_section_info. */
7096 pa_strip_name_encoding (const char *str)
7098 str += (*str == '@');
7099 str += (*str == '*');
7104 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7106 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7109 /* Returns 1 if OP is a function label involved in a simple addition
7110 with a constant. Used to keep certain patterns from matching
7111 during instruction combination. */
7113 is_function_label_plus_const (rtx op)
7115 /* Strip off any CONST. */
7116 if (GET_CODE (op) == CONST)
7119 return (GET_CODE (op) == PLUS
7120 && function_label_operand (XEXP (op, 0), Pmode)
7121 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7124 /* Output assembly code for a thunk to FUNCTION. */
7127 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7128 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7131 const char *fname = XSTR (XEXP (DECL_RTL (function), 0), 0);
7132 const char *tname = XSTR (XEXP (DECL_RTL (thunk_fndecl), 0), 0);
7133 int val_14 = VAL_14_BITS_P (delta);
7135 static unsigned int current_thunk_number;
7138 ASM_OUTPUT_LABEL (file, tname);
7139 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7141 fname = (*targetm.strip_name_encoding) (fname);
7142 tname = (*targetm.strip_name_encoding) (tname);
7144 /* Output the thunk. We know that the function is in the same
7145 translation unit (i.e., the same space) as the thunk, and that
7146 thunks are output after their method. Thus, we don't need an
7147 external branch to reach the function. With SOM and GAS,
7148 functions and thunks are effectively in different sections.
7149 Thus, we can always use a IA-relative branch and the linker
7150 will add a long branch stub if necessary.
7152 However, we have to be careful when generating PIC code on the
7153 SOM port to ensure that the sequence does not transfer to an
7154 import stub for the target function as this could clobber the
7155 return value saved at SP-24. This would also apply to the
7156 32-bit linux port if the multi-space model is implemented. */
7157 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7158 && !(flag_pic && TREE_PUBLIC (function))
7159 && (TARGET_GAS || last_address < 262132))
7160 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7161 && ((targetm.have_named_sections
7162 && DECL_SECTION_NAME (thunk_fndecl) != NULL
7163 /* The GNU 64-bit linker has rather poor stub management.
7164 So, we use a long branch from thunks that aren't in
7165 the same section as the target function. */
7167 && (DECL_SECTION_NAME (thunk_fndecl)
7168 != DECL_SECTION_NAME (function)))
7169 || ((DECL_SECTION_NAME (thunk_fndecl)
7170 == DECL_SECTION_NAME (function))
7171 && last_address < 262132)))
7172 || (!targetm.have_named_sections && last_address < 262132))))
7176 fprintf (file, "\tb %s\n\tldo " HOST_WIDE_INT_PRINT_DEC
7177 "(%%r26),%%r26\n", fname, delta);
7182 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7184 fprintf (file, "\tb %s\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7185 "(%%r1),%%r26\n", fname, delta);
7189 else if (TARGET_64BIT)
7191 /* We only have one call-clobbered scratch register, so we can't
7192 make use of the delay slot if delta doesn't fit in 14 bits. */
7194 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7195 ",%%r26\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7196 "(%%r1),%%r26\n", delta, delta);
7198 fprintf (file, "\tb,l .+8,%%r1\n");
7202 fprintf (file, "\taddil L'%s-$PIC_pcrel$0+4,%%r1\n", fname);
7203 fprintf (file, "\tldo R'%s-$PIC_pcrel$0+8(%%r1),%%r1\n", fname);
7207 int off = val_14 ? 8 : 16;
7208 fprintf (file, "\taddil L'%s-%s-%d,%%r1\n", fname, tname, off);
7209 fprintf (file, "\tldo R'%s-%s-%d(%%r1),%%r1\n", fname, tname, off);
7214 fprintf (file, "\tbv %%r0(%%r1)\n\tldo ");
7215 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7220 fprintf (file, "\tbv,n %%r0(%%r1)\n");
7224 else if (TARGET_PORTABLE_RUNTIME)
7226 fprintf (file, "\tldil L'%s,%%r1\n", fname);
7227 fprintf (file, "\tldo R'%s(%%r1),%%r22\n", fname);
7231 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7232 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7237 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7239 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7240 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7244 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7246 /* The function is accessible from outside this module. The only
7247 way to avoid an import stub between the thunk and function is to
7248 call the function directly with an indirect sequence similar to
7249 that used by $$dyncall. This is possible because $$dyncall acts
7250 as the import stub in an indirect call. */
7253 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7254 lab = (*targetm.strip_name_encoding) (label);
7256 fprintf (file, "\taddil LT'%s,%%r19\n", lab);
7257 fprintf (file, "\tldw RT'%s(%%r1),%%r22\n", lab);
7258 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7259 fprintf (file, "\tbb,>=,n %%r22,30,.+16\n");
7260 fprintf (file, "\tdepi 0,31,2,%%r22\n");
7261 fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
7262 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7265 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7271 fprintf (file, "\tbve (%%r22)\n\tldo ");
7276 if (TARGET_NO_SPACE_REGS)
7278 fprintf (file, "\tbe 0(%%sr4,%%r22)\n\tldo ");
7283 fprintf (file, "\tldsid (%%sr0,%%r22),%%r21\n");
7284 fprintf (file, "\tmtsp %%r21,%%sr0\n");
7285 fprintf (file, "\tbe 0(%%sr0,%%r22)\n\tldo ");
7291 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7293 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7298 fprintf (file, "\tb,l .+8,%%r1\n");
7300 fprintf (file, "\tbl .+8,%%r1\n");
7302 if (TARGET_SOM || !TARGET_GAS)
7304 fprintf (file, "\taddil L'%s-%s-8,%%r1\n", fname, tname);
7305 fprintf (file, "\tldo R'%s-%s-8(%%r1),%%r22\n", fname, tname);
7309 fprintf (file, "\taddil L'%s-$PIC_pcrel$0+4,%%r1\n", fname);
7310 fprintf (file, "\tldo R'%s-$PIC_pcrel$0+8(%%r1),%%r22\n", fname);
7315 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7316 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7321 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7323 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7324 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7331 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC ",%%r26\n", delta);
7333 fprintf (file, "\tldil L'%s,%%r22\n", fname);
7334 fprintf (file, "\tbe R'%s(%%sr4,%%r22)\n\tldo ", fname);
7338 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7343 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7348 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
7350 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7353 fprintf (file, "\t.align 4\n");
7354 ASM_OUTPUT_LABEL (file, label);
7355 fprintf (file, "\t.word P'%s\n", fname);
7356 function_section (thunk_fndecl);
7359 current_thunk_number++;
7360 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
7361 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
7362 last_address += nbytes;
7363 update_total_code_bytes (nbytes);
7366 /* Only direct calls to static functions are allowed to be sibling (tail)
7369 This restriction is necessary because some linker generated stubs will
7370 store return pointers into rp' in some cases which might clobber a
7371 live value already in rp'.
7373 In a sibcall the current function and the target function share stack
7374 space. Thus if the path to the current function and the path to the
7375 target function save a value in rp', they save the value into the
7376 same stack slot, which has undesirable consequences.
7378 Because of the deferred binding nature of shared libraries any function
7379 with external scope could be in a different load module and thus require
7380 rp' to be saved when calling that function. So sibcall optimizations
7381 can only be safe for static function.
7383 Note that GCC never needs return value relocations, so we don't have to
7384 worry about static calls with return value relocations (which require
7387 It is safe to perform a sibcall optimization when the target function
7388 will never return. */
7390 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
7392 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
7393 single subspace mode and the call is not indirect. As far as I know,
7394 there is no operating system support for the multiple subspace mode.
7395 It might be possible to support indirect calls if we didn't use
7396 $$dyncall (see the indirect sequence generated in output_call). */
7398 return (decl != NULL_TREE);
7400 /* Sibcalls are not ok because the arg pointer register is not a fixed
7401 register. This prevents the sibcall optimization from occurring. In
7402 addition, there are problems with stub placement using GNU ld. This
7403 is because a normal sibcall branch uses a 17-bit relocation while
7404 a regular call branch uses a 22-bit relocation. As a result, more
7405 care needs to be taken in the placement of long-branch stubs. */
7410 && !TARGET_PORTABLE_RUNTIME
7411 && !TREE_PUBLIC (decl));
7414 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7415 use in fmpyadd instructions. */
7417 fmpyaddoperands (rtx *operands)
7419 enum machine_mode mode = GET_MODE (operands[0]);
7421 /* Must be a floating point mode. */
7422 if (mode != SFmode && mode != DFmode)
7425 /* All modes must be the same. */
7426 if (! (mode == GET_MODE (operands[1])
7427 && mode == GET_MODE (operands[2])
7428 && mode == GET_MODE (operands[3])
7429 && mode == GET_MODE (operands[4])
7430 && mode == GET_MODE (operands[5])))
7433 /* All operands must be registers. */
7434 if (! (GET_CODE (operands[1]) == REG
7435 && GET_CODE (operands[2]) == REG
7436 && GET_CODE (operands[3]) == REG
7437 && GET_CODE (operands[4]) == REG
7438 && GET_CODE (operands[5]) == REG))
7441 /* Only 2 real operands to the addition. One of the input operands must
7442 be the same as the output operand. */
7443 if (! rtx_equal_p (operands[3], operands[4])
7444 && ! rtx_equal_p (operands[3], operands[5]))
7447 /* Inout operand of add can not conflict with any operands from multiply. */
7448 if (rtx_equal_p (operands[3], operands[0])
7449 || rtx_equal_p (operands[3], operands[1])
7450 || rtx_equal_p (operands[3], operands[2]))
7453 /* multiply can not feed into addition operands. */
7454 if (rtx_equal_p (operands[4], operands[0])
7455 || rtx_equal_p (operands[5], operands[0]))
7458 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
7460 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
7461 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
7462 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
7463 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
7464 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
7465 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
7468 /* Passed. Operands are suitable for fmpyadd. */
7472 #if !defined(USE_COLLECT2)
7474 pa_asm_out_constructor (rtx symbol, int priority)
7476 if (!function_label_operand (symbol, VOIDmode))
7477 hppa_encode_label (symbol);
7479 #ifdef CTORS_SECTION_ASM_OP
7480 default_ctor_section_asm_out_constructor (symbol, priority);
7482 # ifdef TARGET_ASM_NAMED_SECTION
7483 default_named_section_asm_out_constructor (symbol, priority);
7485 default_stabs_asm_out_constructor (symbol, priority);
7491 pa_asm_out_destructor (rtx symbol, int priority)
7493 if (!function_label_operand (symbol, VOIDmode))
7494 hppa_encode_label (symbol);
7496 #ifdef DTORS_SECTION_ASM_OP
7497 default_dtor_section_asm_out_destructor (symbol, priority);
7499 # ifdef TARGET_ASM_NAMED_SECTION
7500 default_named_section_asm_out_destructor (symbol, priority);
7502 default_stabs_asm_out_destructor (symbol, priority);
7508 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7509 use in fmpysub instructions. */
7511 fmpysuboperands (rtx *operands)
7513 enum machine_mode mode = GET_MODE (operands[0]);
7515 /* Must be a floating point mode. */
7516 if (mode != SFmode && mode != DFmode)
7519 /* All modes must be the same. */
7520 if (! (mode == GET_MODE (operands[1])
7521 && mode == GET_MODE (operands[2])
7522 && mode == GET_MODE (operands[3])
7523 && mode == GET_MODE (operands[4])
7524 && mode == GET_MODE (operands[5])))
7527 /* All operands must be registers. */
7528 if (! (GET_CODE (operands[1]) == REG
7529 && GET_CODE (operands[2]) == REG
7530 && GET_CODE (operands[3]) == REG
7531 && GET_CODE (operands[4]) == REG
7532 && GET_CODE (operands[5]) == REG))
7535 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
7536 operation, so operands[4] must be the same as operand[3]. */
7537 if (! rtx_equal_p (operands[3], operands[4]))
7540 /* multiply can not feed into subtraction. */
7541 if (rtx_equal_p (operands[5], operands[0]))
7544 /* Inout operand of sub can not conflict with any operands from multiply. */
7545 if (rtx_equal_p (operands[3], operands[0])
7546 || rtx_equal_p (operands[3], operands[1])
7547 || rtx_equal_p (operands[3], operands[2]))
7550 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
7552 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
7553 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
7554 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
7555 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
7556 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
7557 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
7560 /* Passed. Operands are suitable for fmpysub. */
7565 plus_xor_ior_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7567 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
7568 || GET_CODE (op) == IOR);
7571 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
7572 constants for shadd instructions. */
7574 shadd_constant_p (int val)
7576 if (val == 2 || val == 4 || val == 8)
7582 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
7583 the valid constant for shadd instructions. */
7585 shadd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7587 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
7590 /* Return 1 if OP is valid as a base register in a reg + reg address. */
7593 basereg_operand (rtx op, enum machine_mode mode)
7595 /* cse will create some unscaled indexed addresses, however; it
7596 generally isn't a win on the PA, so avoid creating unscaled
7597 indexed addresses until after cse is finished. */
7598 if (!cse_not_expected)
7601 /* Allow any register when TARGET_NO_SPACE_REGS is in effect since
7602 we don't have to worry about the braindamaged implicit space
7603 register selection from the basereg. */
7604 if (TARGET_NO_SPACE_REGS)
7605 return (GET_CODE (op) == REG);
7607 /* While it's always safe to index off the frame pointer, it's not
7608 always profitable, particularly when the frame pointer is being
7610 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
7613 return (GET_CODE (op) == REG
7615 && register_operand (op, mode));
7618 /* Return 1 if this operand is anything other than a hard register. */
7621 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7623 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
7626 /* Return 1 if INSN branches forward. Should be using insn_addresses
7627 to avoid walking through all the insns... */
7629 forward_branch_p (rtx insn)
7631 rtx label = JUMP_LABEL (insn);
7638 insn = NEXT_INSN (insn);
7641 return (insn == label);
7644 /* Return 1 if OP is an equality comparison, else return 0. */
7646 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7648 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
7651 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
7653 movb_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7655 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
7656 || GET_CODE (op) == LT || GET_CODE (op) == GE);
7659 /* Return 1 if INSN is in the delay slot of a call instruction. */
7661 jump_in_call_delay (rtx insn)
7664 if (GET_CODE (insn) != JUMP_INSN)
7667 if (PREV_INSN (insn)
7668 && PREV_INSN (PREV_INSN (insn))
7669 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
7671 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
7673 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
7674 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
7681 /* Output an unconditional move and branch insn. */
7684 output_parallel_movb (rtx *operands, int length)
7686 /* These are the cases in which we win. */
7688 return "mov%I1b,tr %1,%0,%2";
7690 /* None of these cases wins, but they don't lose either. */
7691 if (dbr_sequence_length () == 0)
7693 /* Nothing in the delay slot, fake it by putting the combined
7694 insn (the copy or add) in the delay slot of a bl. */
7695 if (GET_CODE (operands[1]) == CONST_INT)
7696 return "b %2\n\tldi %1,%0";
7698 return "b %2\n\tcopy %1,%0";
7702 /* Something in the delay slot, but we've got a long branch. */
7703 if (GET_CODE (operands[1]) == CONST_INT)
7704 return "ldi %1,%0\n\tb %2";
7706 return "copy %1,%0\n\tb %2";
7710 /* Output an unconditional add and branch insn. */
7713 output_parallel_addb (rtx *operands, int length)
7715 /* To make life easy we want operand0 to be the shared input/output
7716 operand and operand1 to be the readonly operand. */
7717 if (operands[0] == operands[1])
7718 operands[1] = operands[2];
7720 /* These are the cases in which we win. */
7722 return "add%I1b,tr %1,%0,%3";
7724 /* None of these cases win, but they don't lose either. */
7725 if (dbr_sequence_length () == 0)
7727 /* Nothing in the delay slot, fake it by putting the combined
7728 insn (the copy or add) in the delay slot of a bl. */
7729 return "b %3\n\tadd%I1 %1,%0,%0";
7733 /* Something in the delay slot, but we've got a long branch. */
7734 return "add%I1 %1,%0,%0\n\tb %3";
7738 /* Return nonzero if INSN (a jump insn) immediately follows a call
7739 to a named function. This is used to avoid filling the delay slot
7740 of the jump since it can usually be eliminated by modifying RP in
7741 the delay slot of the call. */
7744 following_call (rtx insn)
7746 if (! TARGET_JUMP_IN_DELAY)
7749 /* Find the previous real insn, skipping NOTEs. */
7750 insn = PREV_INSN (insn);
7751 while (insn && GET_CODE (insn) == NOTE)
7752 insn = PREV_INSN (insn);
7754 /* Check for CALL_INSNs and millicode calls. */
7756 && ((GET_CODE (insn) == CALL_INSN
7757 && get_attr_type (insn) != TYPE_DYNCALL)
7758 || (GET_CODE (insn) == INSN
7759 && GET_CODE (PATTERN (insn)) != SEQUENCE
7760 && GET_CODE (PATTERN (insn)) != USE
7761 && GET_CODE (PATTERN (insn)) != CLOBBER
7762 && get_attr_type (insn) == TYPE_MILLI)))
7768 /* We use this hook to perform a PA specific optimization which is difficult
7769 to do in earlier passes.
7771 We want the delay slots of branches within jump tables to be filled.
7772 None of the compiler passes at the moment even has the notion that a
7773 PA jump table doesn't contain addresses, but instead contains actual
7776 Because we actually jump into the table, the addresses of each entry
7777 must stay constant in relation to the beginning of the table (which
7778 itself must stay constant relative to the instruction to jump into
7779 it). I don't believe we can guarantee earlier passes of the compiler
7780 will adhere to those rules.
7782 So, late in the compilation process we find all the jump tables, and
7783 expand them into real code -- eg each entry in the jump table vector
7784 will get an appropriate label followed by a jump to the final target.
7786 Reorg and the final jump pass can then optimize these branches and
7787 fill their delay slots. We end up with smaller, more efficient code.
7789 The jump instructions within the table are special; we must be able
7790 to identify them during assembly output (if the jumps don't get filled
7791 we need to emit a nop rather than nullifying the delay slot)). We
7792 identify jumps in switch tables by marking the SET with DImode.
7794 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
7795 insns. This serves two purposes, first it prevents jump.c from
7796 noticing that the last N entries in the table jump to the instruction
7797 immediately after the table and deleting the jumps. Second, those
7798 insns mark where we should emit .begin_brtab and .end_brtab directives
7799 when using GAS (allows for better link time optimizations). */
7806 remove_useless_addtr_insns (1);
7808 if (pa_cpu < PROCESSOR_8000)
7809 pa_combine_instructions ();
7812 /* This is fairly cheap, so always run it if optimizing. */
7813 if (optimize > 0 && !TARGET_BIG_SWITCH)
7815 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
7816 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7818 rtx pattern, tmp, location;
7819 unsigned int length, i;
7821 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
7822 if (GET_CODE (insn) != JUMP_INSN
7823 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
7824 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
7827 /* Emit marker for the beginning of the branch table. */
7828 emit_insn_before (gen_begin_brtab (), insn);
7830 pattern = PATTERN (insn);
7831 location = PREV_INSN (insn);
7832 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
7834 for (i = 0; i < length; i++)
7836 /* Emit a label before each jump to keep jump.c from
7837 removing this code. */
7838 tmp = gen_label_rtx ();
7839 LABEL_NUSES (tmp) = 1;
7840 emit_label_after (tmp, location);
7841 location = NEXT_INSN (location);
7843 if (GET_CODE (pattern) == ADDR_VEC)
7845 /* Emit the jump itself. */
7846 tmp = gen_jump (XEXP (XVECEXP (pattern, 0, i), 0));
7847 tmp = emit_jump_insn_after (tmp, location);
7848 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
7849 /* It is easy to rely on the branch table markers
7850 during assembly output to trigger the correct code
7851 for a switch table jump with an unfilled delay slot,
7853 However, that requires state and assumes that we look
7856 We can't make such assumptions when computing the length
7857 of instructions. Ugh. We could walk the insn chain to
7858 determine if this instruction is in a branch table, but
7859 that can get rather expensive, particularly during the
7860 branch shortening phase of the compiler.
7862 So instead we mark this jump as being special. This is
7863 far from ideal and knows that no code after this will
7864 muck around with the mode of the JUMP_INSN itself. */
7865 PUT_MODE (tmp, SImode);
7866 LABEL_NUSES (JUMP_LABEL (tmp))++;
7867 location = NEXT_INSN (location);
7871 /* Emit the jump itself. */
7872 tmp = gen_jump (XEXP (XVECEXP (pattern, 1, i), 0));
7873 tmp = emit_jump_insn_after (tmp, location);
7874 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0);
7875 /* It is easy to rely on the branch table markers
7876 during assembly output to trigger the correct code
7877 for a switch table jump with an unfilled delay slot,
7879 However, that requires state and assumes that we look
7882 We can't make such assumptions when computing the length
7883 of instructions. Ugh. We could walk the insn chain to
7884 determine if this instruction is in a branch table, but
7885 that can get rather expensive, particularly during the
7886 branch shortening phase of the compiler.
7888 So instead we mark this jump as being special. This is
7889 far from ideal and knows that no code after this will
7890 muck around with the mode of the JUMP_INSN itself. */
7891 PUT_MODE (tmp, SImode);
7892 LABEL_NUSES (JUMP_LABEL (tmp))++;
7893 location = NEXT_INSN (location);
7896 /* Emit a BARRIER after the jump. */
7897 emit_barrier_after (location);
7898 location = NEXT_INSN (location);
7901 /* Emit marker for the end of the branch table. */
7902 emit_insn_before (gen_end_brtab (), location);
7903 location = NEXT_INSN (location);
7904 emit_barrier_after (location);
7906 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
7912 /* Sill need an end_brtab insn. */
7913 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7915 /* Find an ADDR_VEC insn. */
7916 if (GET_CODE (insn) != JUMP_INSN
7917 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
7918 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
7921 /* Now generate markers for the beginning and end of the
7923 emit_insn_before (gen_begin_brtab (), insn);
7924 emit_insn_after (gen_end_brtab (), insn);
7929 /* The PA has a number of odd instructions which can perform multiple
7930 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
7931 it may be profitable to combine two instructions into one instruction
7932 with two outputs. It's not profitable PA2.0 machines because the
7933 two outputs would take two slots in the reorder buffers.
7935 This routine finds instructions which can be combined and combines
7936 them. We only support some of the potential combinations, and we
7937 only try common ways to find suitable instructions.
7939 * addb can add two registers or a register and a small integer
7940 and jump to a nearby (+-8k) location. Normally the jump to the
7941 nearby location is conditional on the result of the add, but by
7942 using the "true" condition we can make the jump unconditional.
7943 Thus addb can perform two independent operations in one insn.
7945 * movb is similar to addb in that it can perform a reg->reg
7946 or small immediate->reg copy and jump to a nearby (+-8k location).
7948 * fmpyadd and fmpysub can perform a FP multiply and either an
7949 FP add or FP sub if the operands of the multiply and add/sub are
7950 independent (there are other minor restrictions). Note both
7951 the fmpy and fadd/fsub can in theory move to better spots according
7952 to data dependencies, but for now we require the fmpy stay at a
7955 * Many of the memory operations can perform pre & post updates
7956 of index registers. GCC's pre/post increment/decrement addressing
7957 is far too simple to take advantage of all the possibilities. This
7958 pass may not be suitable since those insns may not be independent.
7960 * comclr can compare two ints or an int and a register, nullify
7961 the following instruction and zero some other register. This
7962 is more difficult to use as it's harder to find an insn which
7963 will generate a comclr than finding something like an unconditional
7964 branch. (conditional moves & long branches create comclr insns).
7966 * Most arithmetic operations can conditionally skip the next
7967 instruction. They can be viewed as "perform this operation
7968 and conditionally jump to this nearby location" (where nearby
7969 is an insns away). These are difficult to use due to the
7970 branch length restrictions. */
7973 pa_combine_instructions (void)
7977 /* This can get expensive since the basic algorithm is on the
7978 order of O(n^2) (or worse). Only do it for -O2 or higher
7979 levels of optimization. */
7983 /* Walk down the list of insns looking for "anchor" insns which
7984 may be combined with "floating" insns. As the name implies,
7985 "anchor" instructions don't move, while "floating" insns may
7987 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
7988 new = make_insn_raw (new);
7990 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
7992 enum attr_pa_combine_type anchor_attr;
7993 enum attr_pa_combine_type floater_attr;
7995 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
7996 Also ignore any special USE insns. */
7997 if ((GET_CODE (anchor) != INSN
7998 && GET_CODE (anchor) != JUMP_INSN
7999 && GET_CODE (anchor) != CALL_INSN)
8000 || GET_CODE (PATTERN (anchor)) == USE
8001 || GET_CODE (PATTERN (anchor)) == CLOBBER
8002 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8003 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8006 anchor_attr = get_attr_pa_combine_type (anchor);
8007 /* See if anchor is an insn suitable for combination. */
8008 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8009 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8010 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8011 && ! forward_branch_p (anchor)))
8015 for (floater = PREV_INSN (anchor);
8017 floater = PREV_INSN (floater))
8019 if (GET_CODE (floater) == NOTE
8020 || (GET_CODE (floater) == INSN
8021 && (GET_CODE (PATTERN (floater)) == USE
8022 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8025 /* Anything except a regular INSN will stop our search. */
8026 if (GET_CODE (floater) != INSN
8027 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8028 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8034 /* See if FLOATER is suitable for combination with the
8036 floater_attr = get_attr_pa_combine_type (floater);
8037 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8038 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8039 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8040 && floater_attr == PA_COMBINE_TYPE_FMPY))
8042 /* If ANCHOR and FLOATER can be combined, then we're
8043 done with this pass. */
8044 if (pa_can_combine_p (new, anchor, floater, 0,
8045 SET_DEST (PATTERN (floater)),
8046 XEXP (SET_SRC (PATTERN (floater)), 0),
8047 XEXP (SET_SRC (PATTERN (floater)), 1)))
8051 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8052 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8054 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8056 if (pa_can_combine_p (new, anchor, floater, 0,
8057 SET_DEST (PATTERN (floater)),
8058 XEXP (SET_SRC (PATTERN (floater)), 0),
8059 XEXP (SET_SRC (PATTERN (floater)), 1)))
8064 if (pa_can_combine_p (new, anchor, floater, 0,
8065 SET_DEST (PATTERN (floater)),
8066 SET_SRC (PATTERN (floater)),
8067 SET_SRC (PATTERN (floater))))
8073 /* If we didn't find anything on the backwards scan try forwards. */
8075 && (anchor_attr == PA_COMBINE_TYPE_FMPY
8076 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8078 for (floater = anchor; floater; floater = NEXT_INSN (floater))
8080 if (GET_CODE (floater) == NOTE
8081 || (GET_CODE (floater) == INSN
8082 && (GET_CODE (PATTERN (floater)) == USE
8083 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8087 /* Anything except a regular INSN will stop our search. */
8088 if (GET_CODE (floater) != INSN
8089 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8090 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8096 /* See if FLOATER is suitable for combination with the
8098 floater_attr = get_attr_pa_combine_type (floater);
8099 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8100 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8101 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8102 && floater_attr == PA_COMBINE_TYPE_FMPY))
8104 /* If ANCHOR and FLOATER can be combined, then we're
8105 done with this pass. */
8106 if (pa_can_combine_p (new, anchor, floater, 1,
8107 SET_DEST (PATTERN (floater)),
8108 XEXP (SET_SRC (PATTERN (floater)),
8110 XEXP (SET_SRC (PATTERN (floater)),
8117 /* FLOATER will be nonzero if we found a suitable floating
8118 insn for combination with ANCHOR. */
8120 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8121 || anchor_attr == PA_COMBINE_TYPE_FMPY))
8123 /* Emit the new instruction and delete the old anchor. */
8124 emit_insn_before (gen_rtx_PARALLEL
8126 gen_rtvec (2, PATTERN (anchor),
8127 PATTERN (floater))),
8130 PUT_CODE (anchor, NOTE);
8131 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8132 NOTE_SOURCE_FILE (anchor) = 0;
8134 /* Emit a special USE insn for FLOATER, then delete
8135 the floating insn. */
8136 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8137 delete_insn (floater);
8142 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8145 /* Emit the new_jump instruction and delete the old anchor. */
8147 = emit_jump_insn_before (gen_rtx_PARALLEL
8149 gen_rtvec (2, PATTERN (anchor),
8150 PATTERN (floater))),
8153 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8154 PUT_CODE (anchor, NOTE);
8155 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8156 NOTE_SOURCE_FILE (anchor) = 0;
8158 /* Emit a special USE insn for FLOATER, then delete
8159 the floating insn. */
8160 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8161 delete_insn (floater);
8169 pa_can_combine_p (rtx new, rtx anchor, rtx floater, int reversed, rtx dest,
8172 int insn_code_number;
8175 /* Create a PARALLEL with the patterns of ANCHOR and
8176 FLOATER, try to recognize it, then test constraints
8177 for the resulting pattern.
8179 If the pattern doesn't match or the constraints
8180 aren't met keep searching for a suitable floater
8182 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8183 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8184 INSN_CODE (new) = -1;
8185 insn_code_number = recog_memoized (new);
8186 if (insn_code_number < 0
8187 || (extract_insn (new), ! constrain_operands (1)))
8201 /* There's up to three operands to consider. One
8202 output and two inputs.
8204 The output must not be used between FLOATER & ANCHOR
8205 exclusive. The inputs must not be set between
8206 FLOATER and ANCHOR exclusive. */
8208 if (reg_used_between_p (dest, start, end))
8211 if (reg_set_between_p (src1, start, end))
8214 if (reg_set_between_p (src2, start, end))
8217 /* If we get here, then everything is good. */
8221 /* Return nonzero if references for INSN are delayed.
8223 Millicode insns are actually function calls with some special
8224 constraints on arguments and register usage.
8226 Millicode calls always expect their arguments in the integer argument
8227 registers, and always return their result in %r29 (ret1). They
8228 are expected to clobber their arguments, %r1, %r29, and the return
8229 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8231 This function tells reorg that the references to arguments and
8232 millicode calls do not appear to happen until after the millicode call.
8233 This allows reorg to put insns which set the argument registers into the
8234 delay slot of the millicode call -- thus they act more like traditional
8237 Note we can not consider side effects of the insn to be delayed because
8238 the branch and link insn will clobber the return pointer. If we happened
8239 to use the return pointer in the delay slot of the call, then we lose.
8241 get_attr_type will try to recognize the given insn, so make sure to
8242 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8245 insn_refs_are_delayed (rtx insn)
8247 return ((GET_CODE (insn) == INSN
8248 && GET_CODE (PATTERN (insn)) != SEQUENCE
8249 && GET_CODE (PATTERN (insn)) != USE
8250 && GET_CODE (PATTERN (insn)) != CLOBBER
8251 && get_attr_type (insn) == TYPE_MILLI));
8254 /* On the HP-PA the value is found in register(s) 28(-29), unless
8255 the mode is SF or DF. Then the value is returned in fr4 (32).
8257 This must perform the same promotions as PROMOTE_MODE, else
8258 PROMOTE_FUNCTION_RETURN will not work correctly.
8260 Small structures must be returned in a PARALLEL on PA64 in order
8261 to match the HP Compiler ABI. */
8264 function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
8266 enum machine_mode valmode;
8268 /* Aggregates with a size less than or equal to 128 bits are returned
8269 in GR 28(-29). They are left justified. The pad bits are undefined.
8270 Larger aggregates are returned in memory. */
8271 if (TARGET_64BIT && AGGREGATE_TYPE_P (valtype))
8275 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
8277 for (i = 0; i < ub; i++)
8279 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8280 gen_rtx_REG (DImode, 28 + i),
8285 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
8288 if ((INTEGRAL_TYPE_P (valtype)
8289 && TYPE_PRECISION (valtype) < BITS_PER_WORD)
8290 || POINTER_TYPE_P (valtype))
8291 valmode = word_mode;
8293 valmode = TYPE_MODE (valtype);
8295 if (TREE_CODE (valtype) == REAL_TYPE
8296 && TYPE_MODE (valtype) != TFmode
8297 && !TARGET_SOFT_FLOAT)
8298 return gen_rtx_REG (valmode, 32);
8300 return gen_rtx_REG (valmode, 28);
8303 /* Return the location of a parameter that is passed in a register or NULL
8304 if the parameter has any component that is passed in memory.
8306 This is new code and will be pushed to into the net sources after
8309 ??? We might want to restructure this so that it looks more like other
8312 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
8313 int named ATTRIBUTE_UNUSED)
8315 int max_arg_words = (TARGET_64BIT ? 8 : 4);
8322 if (mode == VOIDmode)
8325 arg_size = FUNCTION_ARG_SIZE (mode, type);
8327 /* If this arg would be passed partially or totally on the stack, then
8328 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
8329 handle arguments which are split between regs and stack slots if
8330 the ABI mandates split arguments. */
8333 /* The 32-bit ABI does not split arguments. */
8334 if (cum->words + arg_size > max_arg_words)
8340 alignment = cum->words & 1;
8341 if (cum->words + alignment >= max_arg_words)
8345 /* The 32bit ABIs and the 64bit ABIs are rather different,
8346 particularly in their handling of FP registers. We might
8347 be able to cleverly share code between them, but I'm not
8348 going to bother in the hope that splitting them up results
8349 in code that is more easily understood. */
8353 /* Advance the base registers to their current locations.
8355 Remember, gprs grow towards smaller register numbers while
8356 fprs grow to higher register numbers. Also remember that
8357 although FP regs are 32-bit addressable, we pretend that
8358 the registers are 64-bits wide. */
8359 gpr_reg_base = 26 - cum->words;
8360 fpr_reg_base = 32 + cum->words;
8362 /* Arguments wider than one word and small aggregates need special
8366 || (type && AGGREGATE_TYPE_P (type)))
8368 /* Double-extended precision (80-bit), quad-precision (128-bit)
8369 and aggregates including complex numbers are aligned on
8370 128-bit boundaries. The first eight 64-bit argument slots
8371 are associated one-to-one, with general registers r26
8372 through r19, and also with floating-point registers fr4
8373 through fr11. Arguments larger than one word are always
8374 passed in general registers.
8376 Using a PARALLEL with a word mode register results in left
8377 justified data on a big-endian target. */
8380 int i, offset = 0, ub = arg_size;
8382 /* Align the base register. */
8383 gpr_reg_base -= alignment;
8385 ub = MIN (ub, max_arg_words - cum->words - alignment);
8386 for (i = 0; i < ub; i++)
8388 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8389 gen_rtx_REG (DImode, gpr_reg_base),
8395 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
8400 /* If the argument is larger than a word, then we know precisely
8401 which registers we must use. */
8415 /* Structures 5 to 8 bytes in size are passed in the general
8416 registers in the same manner as other non floating-point
8417 objects. The data is right-justified and zero-extended
8420 This is magic. Normally, using a PARALLEL results in left
8421 justified data on a big-endian target. However, using a
8422 single double-word register provides the required right
8423 justification for 5 to 8 byte structures. This has nothing
8424 to do with the direction of padding specified for the argument.
8425 It has to do with how the data is widened and shifted into
8426 and from the register.
8428 Aside from adding load_multiple and store_multiple patterns,
8429 this is the only way that I have found to obtain right
8430 justification of BLKmode data when it has a size greater
8431 than one word. Splitting the operation into two SImode loads
8432 or returning a DImode REG results in left justified data. */
8433 if (mode == BLKmode)
8435 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
8436 gen_rtx_REG (DImode, gpr_reg_base),
8438 return gen_rtx_PARALLEL (mode, gen_rtvec (1, loc));
8443 /* We have a single word (32 bits). A simple computation
8444 will get us the register #s we need. */
8445 gpr_reg_base = 26 - cum->words;
8446 fpr_reg_base = 32 + 2 * cum->words;
8450 /* Determine if the argument needs to be passed in both general and
8451 floating point registers. */
8452 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
8453 /* If we are doing soft-float with portable runtime, then there
8454 is no need to worry about FP regs. */
8455 && !TARGET_SOFT_FLOAT
8456 /* The parameter must be some kind of float, else we can just
8457 pass it in integer registers. */
8458 && FLOAT_MODE_P (mode)
8459 /* The target function must not have a prototype. */
8460 && cum->nargs_prototype <= 0
8461 /* libcalls do not need to pass items in both FP and general
8463 && type != NULL_TREE
8464 /* All this hair applies to "outgoing" args only. This includes
8465 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
8467 /* Also pass outgoing floating arguments in both registers in indirect
8468 calls with the 32 bit ABI and the HP assembler since there is no
8469 way to the specify argument locations in static functions. */
8474 && FLOAT_MODE_P (mode)))
8480 gen_rtx_EXPR_LIST (VOIDmode,
8481 gen_rtx_REG (mode, fpr_reg_base),
8483 gen_rtx_EXPR_LIST (VOIDmode,
8484 gen_rtx_REG (mode, gpr_reg_base),
8489 /* See if we should pass this parameter in a general register. */
8490 if (TARGET_SOFT_FLOAT
8491 /* Indirect calls in the normal 32bit ABI require all arguments
8492 to be passed in general registers. */
8493 || (!TARGET_PORTABLE_RUNTIME
8497 /* If the parameter is not a floating point parameter, then
8498 it belongs in GPRs. */
8499 || !FLOAT_MODE_P (mode))
8500 retval = gen_rtx_REG (mode, gpr_reg_base);
8502 retval = gen_rtx_REG (mode, fpr_reg_base);
8508 /* If this arg would be passed totally in registers or totally on the stack,
8509 then this routine should return zero. It is currently called only for
8510 the 64-bit target. */
8512 function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8513 tree type, int named ATTRIBUTE_UNUSED)
8515 unsigned int max_arg_words = 8;
8516 unsigned int offset = 0;
8518 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
8521 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
8522 /* Arg fits fully into registers. */
8524 else if (cum->words + offset >= max_arg_words)
8525 /* Arg fully on the stack. */
8529 return max_arg_words - cum->words - offset;
8533 /* Return 1 if this is a comparison operator. This allows the use of
8534 MATCH_OPERATOR to recognize all the branch insns. */
8537 cmpib_comparison_operator (rtx op, enum machine_mode mode)
8539 return ((mode == VOIDmode || GET_MODE (op) == mode)
8540 && (GET_CODE (op) == EQ
8541 || GET_CODE (op) == NE
8542 || GET_CODE (op) == GT
8543 || GET_CODE (op) == GTU
8544 || GET_CODE (op) == GE
8545 || GET_CODE (op) == LT
8546 || GET_CODE (op) == LE
8547 || GET_CODE (op) == LEU));
8550 /* On hpux10, the linker will give an error if we have a reference
8551 in the read-only data section to a symbol defined in a shared
8552 library. Therefore, expressions that might require a reloc can
8553 not be placed in the read-only data section. */
8556 pa_select_section (exp, reloc, align)
8559 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED;
8561 if (TREE_CODE (exp) == VAR_DECL
8562 && TREE_READONLY (exp)
8563 && !TREE_THIS_VOLATILE (exp)
8564 && DECL_INITIAL (exp)
8565 && (DECL_INITIAL (exp) == error_mark_node
8566 || TREE_CONSTANT (DECL_INITIAL (exp)))
8568 readonly_data_section ();
8569 else if (TREE_CODE_CLASS (TREE_CODE (exp)) == 'c'
8570 && !(TREE_CODE (exp) == STRING_CST && flag_writable_strings)
8572 readonly_data_section ();
8578 pa_globalize_label (FILE *stream, const char *name)
8580 /* We only handle DATA objects here, functions are globalized in
8581 ASM_DECLARE_FUNCTION_NAME. */
8582 if (! FUNCTION_NAME_P (name))
8584 fputs ("\t.EXPORT ", stream);
8585 assemble_name (stream, name);
8586 fputs (",DATA\n", stream);