1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
42 #include "integrate.h"
51 #include "target-def.h"
53 static int hppa_use_dfa_pipeline_interface PARAMS ((void));
55 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
56 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hppa_use_dfa_pipeline_interface
59 hppa_use_dfa_pipeline_interface ()
64 /* Return nonzero if there is a bypass for the output of
65 OUT_INSN and the fp store IN_INSN. */
67 hppa_fpstore_bypass_p (out_insn, in_insn)
68 rtx out_insn, in_insn;
70 enum machine_mode store_mode;
71 enum machine_mode other_mode;
74 if (recog_memoized (in_insn) < 0
75 || get_attr_type (in_insn) != TYPE_FPSTORE
76 || recog_memoized (out_insn) < 0)
79 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
81 set = single_set (out_insn);
85 other_mode = GET_MODE (SET_SRC (set));
87 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
91 #ifndef DO_FRAME_NOTES
92 #ifdef INCOMING_RETURN_ADDR_RTX
93 #define DO_FRAME_NOTES 1
95 #define DO_FRAME_NOTES 0
99 static int hppa_address_cost PARAMS ((rtx));
100 static bool hppa_rtx_costs PARAMS ((rtx, int, int, int *));
101 static inline rtx force_mode PARAMS ((enum machine_mode, rtx));
102 static void pa_reorg PARAMS ((void));
103 static void pa_combine_instructions PARAMS ((void));
104 static int pa_can_combine_p PARAMS ((rtx, rtx, rtx, int, rtx, rtx, rtx));
105 static int forward_branch_p PARAMS ((rtx));
106 static int shadd_constant_p PARAMS ((int));
107 static void compute_zdepwi_operands PARAMS ((unsigned HOST_WIDE_INT, unsigned *));
108 static int compute_movstrsi_length PARAMS ((rtx));
109 static bool pa_assemble_integer PARAMS ((rtx, unsigned int, int));
110 static void remove_useless_addtr_insns PARAMS ((int));
111 static void store_reg PARAMS ((int, int, int));
112 static void store_reg_modify PARAMS ((int, int, int));
113 static void load_reg PARAMS ((int, int, int));
114 static void set_reg_plus_d PARAMS ((int, int, int, int));
115 static void pa_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
116 static void update_total_code_bytes PARAMS ((int));
117 static void pa_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
118 static int pa_adjust_cost PARAMS ((rtx, rtx, rtx, int));
119 static int pa_adjust_priority PARAMS ((rtx, int));
120 static int pa_issue_rate PARAMS ((void));
121 static void pa_select_section PARAMS ((tree, int, unsigned HOST_WIDE_INT))
123 static void pa_encode_section_info PARAMS ((tree, rtx, int));
124 static const char *pa_strip_name_encoding PARAMS ((const char *));
125 static bool pa_function_ok_for_sibcall PARAMS ((tree, tree));
126 static void pa_globalize_label PARAMS ((FILE *, const char *))
128 static void pa_asm_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
129 HOST_WIDE_INT, tree));
130 #if !defined(USE_COLLECT2)
131 static void pa_asm_out_constructor PARAMS ((rtx, int));
132 static void pa_asm_out_destructor PARAMS ((rtx, int));
134 static void pa_init_builtins PARAMS ((void));
135 static void copy_fp_args PARAMS ((rtx)) ATTRIBUTE_UNUSED;
136 static int length_fp_args PARAMS ((rtx)) ATTRIBUTE_UNUSED;
137 static struct deferred_plabel *get_plabel PARAMS ((const char *))
139 static inline void pa_file_start_level PARAMS ((void)) ATTRIBUTE_UNUSED;
140 static inline void pa_file_start_space PARAMS ((int)) ATTRIBUTE_UNUSED;
141 static inline void pa_file_start_file PARAMS ((int)) ATTRIBUTE_UNUSED;
142 static inline void pa_file_start_mcount PARAMS ((const char*)) ATTRIBUTE_UNUSED;
143 static void pa_elf_file_start PARAMS ((void)) ATTRIBUTE_UNUSED;
144 static void pa_som_file_start PARAMS ((void)) ATTRIBUTE_UNUSED;
145 static void pa_linux_file_start PARAMS ((void)) ATTRIBUTE_UNUSED;
146 static void pa_hpux64_gas_file_start PARAMS ((void)) ATTRIBUTE_UNUSED;
147 static void pa_hpux64_hpas_file_start PARAMS ((void)) ATTRIBUTE_UNUSED;
148 static void output_deferred_plabels PARAMS ((void));
150 /* Save the operands last given to a compare for use when we
151 generate a scc or bcc insn. */
152 rtx hppa_compare_op0, hppa_compare_op1;
153 enum cmp_type hppa_branch_type;
155 /* Which cpu we are scheduling for. */
156 enum processor_type pa_cpu;
158 /* String to hold which cpu we are scheduling for. */
159 const char *pa_cpu_string;
161 /* Which architecture we are generating code for. */
162 enum architecture_type pa_arch;
164 /* String to hold which architecture we are generating code for. */
165 const char *pa_arch_string;
167 /* Counts for the number of callee-saved general and floating point
168 registers which were saved by the current function's prologue. */
169 static int gr_saved, fr_saved;
171 static rtx find_addr_reg PARAMS ((rtx));
173 /* Keep track of the number of bytes we have output in the CODE subspace
174 during this compilation so we'll know when to emit inline long-calls. */
175 unsigned long total_code_bytes;
177 /* The last address of the previous function plus the number of bytes in
178 associated thunks that have been output. This is used to determine if
179 a thunk can use an IA-relative branch to reach its target function. */
180 static int last_address;
182 /* Variables to handle plabels that we discover are necessary at assembly
183 output time. They are output after the current function. */
184 struct deferred_plabel GTY(())
189 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
191 static size_t n_deferred_plabels = 0;
194 /* Initialize the GCC target structure. */
196 #undef TARGET_ASM_ALIGNED_HI_OP
197 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
198 #undef TARGET_ASM_ALIGNED_SI_OP
199 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
200 #undef TARGET_ASM_ALIGNED_DI_OP
201 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
202 #undef TARGET_ASM_UNALIGNED_HI_OP
203 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
204 #undef TARGET_ASM_UNALIGNED_SI_OP
205 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
206 #undef TARGET_ASM_UNALIGNED_DI_OP
207 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
208 #undef TARGET_ASM_INTEGER
209 #define TARGET_ASM_INTEGER pa_assemble_integer
211 #undef TARGET_ASM_FUNCTION_PROLOGUE
212 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
213 #undef TARGET_ASM_FUNCTION_EPILOGUE
214 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
216 #undef TARGET_SCHED_ADJUST_COST
217 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
218 #undef TARGET_SCHED_ADJUST_PRIORITY
219 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
220 #undef TARGET_SCHED_ISSUE_RATE
221 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
223 #undef TARGET_ENCODE_SECTION_INFO
224 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
225 #undef TARGET_STRIP_NAME_ENCODING
226 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
228 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
229 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
231 #undef TARGET_ASM_OUTPUT_MI_THUNK
232 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
233 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
234 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
236 #undef TARGET_ASM_FILE_END
237 #define TARGET_ASM_FILE_END output_deferred_plabels
239 #if !defined(USE_COLLECT2)
240 #undef TARGET_ASM_CONSTRUCTOR
241 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
242 #undef TARGET_ASM_DESTRUCTOR
243 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
246 #undef TARGET_INIT_BUILTINS
247 #define TARGET_INIT_BUILTINS pa_init_builtins
249 #undef TARGET_RTX_COSTS
250 #define TARGET_RTX_COSTS hppa_rtx_costs
251 #undef TARGET_ADDRESS_COST
252 #define TARGET_ADDRESS_COST hppa_address_cost
254 #undef TARGET_MACHINE_DEPENDENT_REORG
255 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
257 struct gcc_target targetm = TARGET_INITIALIZER;
262 if (pa_cpu_string == NULL)
263 pa_cpu_string = TARGET_SCHED_DEFAULT;
265 if (! strcmp (pa_cpu_string, "8000"))
267 pa_cpu_string = "8000";
268 pa_cpu = PROCESSOR_8000;
270 else if (! strcmp (pa_cpu_string, "7100"))
272 pa_cpu_string = "7100";
273 pa_cpu = PROCESSOR_7100;
275 else if (! strcmp (pa_cpu_string, "700"))
277 pa_cpu_string = "700";
278 pa_cpu = PROCESSOR_700;
280 else if (! strcmp (pa_cpu_string, "7100LC"))
282 pa_cpu_string = "7100LC";
283 pa_cpu = PROCESSOR_7100LC;
285 else if (! strcmp (pa_cpu_string, "7200"))
287 pa_cpu_string = "7200";
288 pa_cpu = PROCESSOR_7200;
290 else if (! strcmp (pa_cpu_string, "7300"))
292 pa_cpu_string = "7300";
293 pa_cpu = PROCESSOR_7300;
297 warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, 7300, and 8000\n", pa_cpu_string);
300 /* Set the instruction set architecture. */
301 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
303 pa_arch_string = "1.0";
304 pa_arch = ARCHITECTURE_10;
305 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
307 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
309 pa_arch_string = "1.1";
310 pa_arch = ARCHITECTURE_11;
311 target_flags &= ~MASK_PA_20;
312 target_flags |= MASK_PA_11;
314 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
316 pa_arch_string = "2.0";
317 pa_arch = ARCHITECTURE_20;
318 target_flags |= MASK_PA_11 | MASK_PA_20;
320 else if (pa_arch_string)
322 warning ("unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
325 /* Unconditional branches in the delay slot are not compatible with dwarf2
326 call frame information. There is no benefit in using this optimization
327 on PA8000 and later processors. */
328 if (pa_cpu >= PROCESSOR_8000
329 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
330 || flag_unwind_tables)
331 target_flags &= ~MASK_JUMP_IN_DELAY;
333 if (flag_pic && TARGET_PORTABLE_RUNTIME)
335 warning ("PIC code generation is not supported in the portable runtime model\n");
338 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
340 warning ("PIC code generation is not compatible with fast indirect calls\n");
343 if (! TARGET_GAS && write_symbols != NO_DEBUG)
345 warning ("-g is only supported when using GAS on this processor,");
346 warning ("-g option disabled");
347 write_symbols = NO_DEBUG;
350 /* We only support the "big PIC" model now. And we always generate PIC
351 code when in 64bit mode. */
352 if (flag_pic == 1 || TARGET_64BIT)
355 /* We can't guarantee that .dword is available for 32-bit targets. */
356 if (UNITS_PER_WORD == 4)
357 targetm.asm_out.aligned_op.di = NULL;
359 /* The unaligned ops are only available when using GAS. */
362 targetm.asm_out.unaligned_op.hi = NULL;
363 targetm.asm_out.unaligned_op.si = NULL;
364 targetm.asm_out.unaligned_op.di = NULL;
371 #ifdef DONT_HAVE_FPUTC_UNLOCKED
372 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
373 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
377 /* Return nonzero only if OP is a register of mode MODE,
380 reg_or_0_operand (op, mode)
382 enum machine_mode mode;
384 return (op == CONST0_RTX (mode) || register_operand (op, mode));
387 /* Return nonzero if OP is suitable for use in a call to a named
390 For 2.5 try to eliminate either call_operand_address or
391 function_label_operand, they perform very similar functions. */
393 call_operand_address (op, mode)
395 enum machine_mode mode ATTRIBUTE_UNUSED;
397 return (GET_MODE (op) == word_mode
398 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
401 /* Return 1 if X contains a symbolic expression. We know these
402 expressions will have one of a few well defined forms, so
403 we need only check those forms. */
405 symbolic_expression_p (x)
409 /* Strip off any HIGH. */
410 if (GET_CODE (x) == HIGH)
413 return (symbolic_operand (x, VOIDmode));
417 symbolic_operand (op, mode)
419 enum machine_mode mode ATTRIBUTE_UNUSED;
421 switch (GET_CODE (op))
428 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
429 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
430 && GET_CODE (XEXP (op, 1)) == CONST_INT);
436 /* Return truth value of statement that OP is a symbolic memory
437 operand of mode MODE. */
440 symbolic_memory_operand (op, mode)
442 enum machine_mode mode ATTRIBUTE_UNUSED;
444 if (GET_CODE (op) == SUBREG)
445 op = SUBREG_REG (op);
446 if (GET_CODE (op) != MEM)
449 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
450 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
453 /* Return 1 if the operand is either a register or a memory operand that is
457 reg_or_nonsymb_mem_operand (op, mode)
459 enum machine_mode mode;
461 if (register_operand (op, mode))
464 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
470 /* Return 1 if the operand is either a register, zero, or a memory operand
471 that is not symbolic. */
474 reg_or_0_or_nonsymb_mem_operand (op, mode)
476 enum machine_mode mode;
478 if (register_operand (op, mode))
481 if (op == CONST0_RTX (mode))
484 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
490 /* Return 1 if the operand is a register operand or a non-symbolic memory
491 operand after reload. This predicate is used for branch patterns that
492 internally handle register reloading. We need to accept non-symbolic
493 memory operands after reload to ensure that the pattern is still valid
494 if reload didn't find a hard register for the operand. */
497 reg_before_reload_operand (op, mode)
499 enum machine_mode mode;
501 /* Don't accept a SUBREG since it will need a reload. */
502 if (GET_CODE (op) == SUBREG)
505 if (register_operand (op, mode))
509 && memory_operand (op, mode)
510 && ! symbolic_memory_operand (op, mode))
516 /* Accept any constant that can be moved in one instruction into a
519 cint_ok_for_move (intval)
520 HOST_WIDE_INT intval;
522 /* OK if ldo, ldil, or zdepi, can be used. */
523 return (CONST_OK_FOR_LETTER_P (intval, 'J')
524 || CONST_OK_FOR_LETTER_P (intval, 'N')
525 || CONST_OK_FOR_LETTER_P (intval, 'K'));
528 /* Accept anything that can be moved in one instruction into a general
531 move_operand (op, mode)
533 enum machine_mode mode;
535 if (register_operand (op, mode))
538 if (GET_CODE (op) == CONSTANT_P_RTX)
541 if (GET_CODE (op) == CONST_INT)
542 return cint_ok_for_move (INTVAL (op));
544 if (GET_CODE (op) == SUBREG)
545 op = SUBREG_REG (op);
546 if (GET_CODE (op) != MEM)
551 /* We consider a LO_SUM DLT reference a move_operand now since it has
552 been merged into the normal movsi/movdi patterns. */
553 if (GET_CODE (op) == LO_SUM
554 && GET_CODE (XEXP (op, 0)) == REG
555 && REG_OK_FOR_BASE_P (XEXP (op, 0))
556 && GET_CODE (XEXP (op, 1)) == UNSPEC
557 && GET_MODE (op) == Pmode)
560 /* Since move_operand is only used for source operands, we can always
561 allow scaled indexing! */
562 if (! TARGET_DISABLE_INDEXING
563 && GET_CODE (op) == PLUS
564 && ((GET_CODE (XEXP (op, 0)) == MULT
565 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
566 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
567 && INTVAL (XEXP (XEXP (op, 0), 1))
568 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
569 && GET_CODE (XEXP (op, 1)) == REG)
570 || (GET_CODE (XEXP (op, 1)) == MULT
571 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
572 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
573 && INTVAL (XEXP (XEXP (op, 1), 1))
574 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
575 && GET_CODE (XEXP (op, 0)) == REG)))
578 return memory_address_p (mode, op);
581 /* Accept REG and any CONST_INT that can be moved in one instruction into a
584 reg_or_cint_move_operand (op, mode)
586 enum machine_mode mode;
588 if (register_operand (op, mode))
591 if (GET_CODE (op) == CONST_INT)
592 return cint_ok_for_move (INTVAL (op));
598 pic_label_operand (op, mode)
600 enum machine_mode mode ATTRIBUTE_UNUSED;
605 switch (GET_CODE (op))
611 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
612 && GET_CODE (XEXP (op, 1)) == CONST_INT);
619 fp_reg_operand (op, mode)
621 enum machine_mode mode ATTRIBUTE_UNUSED;
623 return reg_renumber && FP_REG_P (op);
628 /* Return truth value of whether OP can be used as an operand in a
629 three operand arithmetic insn that accepts registers of mode MODE
630 or 14-bit signed integers. */
632 arith_operand (op, mode)
634 enum machine_mode mode;
636 return (register_operand (op, mode)
637 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
640 /* Return truth value of whether OP can be used as an operand in a
641 three operand arithmetic insn that accepts registers of mode MODE
642 or 11-bit signed integers. */
644 arith11_operand (op, mode)
646 enum machine_mode mode;
648 return (register_operand (op, mode)
649 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
652 /* Return truth value of whether OP can be used as an operand in a
655 adddi3_operand (op, mode)
657 enum machine_mode mode;
659 return (register_operand (op, mode)
660 || (GET_CODE (op) == CONST_INT
661 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
664 /* A constant integer suitable for use in a PRE_MODIFY memory
667 pre_cint_operand (op, mode)
669 enum machine_mode mode ATTRIBUTE_UNUSED;
671 return (GET_CODE (op) == CONST_INT
672 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
675 /* A constant integer suitable for use in a POST_MODIFY memory
678 post_cint_operand (op, mode)
680 enum machine_mode mode ATTRIBUTE_UNUSED;
682 return (GET_CODE (op) == CONST_INT
683 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
687 arith_double_operand (op, mode)
689 enum machine_mode mode;
691 return (register_operand (op, mode)
692 || (GET_CODE (op) == CONST_DOUBLE
693 && GET_MODE (op) == mode
694 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
695 && ((CONST_DOUBLE_HIGH (op) >= 0)
696 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
699 /* Return truth value of whether OP is an integer which fits the
700 range constraining immediate operands in three-address insns, or
701 is an integer register. */
704 ireg_or_int5_operand (op, mode)
706 enum machine_mode mode ATTRIBUTE_UNUSED;
708 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
709 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
712 /* Return nonzero if OP is an integer register, else return zero. */
714 ireg_operand (op, mode)
716 enum machine_mode mode ATTRIBUTE_UNUSED;
718 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
721 /* Return truth value of whether OP is an integer which fits the
722 range constraining immediate operands in three-address insns. */
725 int5_operand (op, mode)
727 enum machine_mode mode ATTRIBUTE_UNUSED;
729 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
733 uint5_operand (op, mode)
735 enum machine_mode mode ATTRIBUTE_UNUSED;
737 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
741 int11_operand (op, mode)
743 enum machine_mode mode ATTRIBUTE_UNUSED;
745 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
749 uint32_operand (op, mode)
751 enum machine_mode mode ATTRIBUTE_UNUSED;
753 #if HOST_BITS_PER_WIDE_INT > 32
754 /* All allowed constants will fit a CONST_INT. */
755 return (GET_CODE (op) == CONST_INT
756 && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
758 return (GET_CODE (op) == CONST_INT
759 || (GET_CODE (op) == CONST_DOUBLE
760 && CONST_DOUBLE_HIGH (op) == 0));
765 arith5_operand (op, mode)
767 enum machine_mode mode;
769 return register_operand (op, mode) || int5_operand (op, mode);
772 /* True iff zdepi can be used to generate this CONST_INT.
773 zdepi first sign extends a 5 bit signed number to a given field
774 length, then places this field anywhere in a zero. */
777 unsigned HOST_WIDE_INT x;
779 unsigned HOST_WIDE_INT lsb_mask, t;
781 /* This might not be obvious, but it's at least fast.
782 This function is critical; we don't have the time loops would take. */
784 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
785 /* Return true iff t is a power of two. */
786 return ((t & (t - 1)) == 0);
789 /* True iff depi or extru can be used to compute (reg & mask).
790 Accept bit pattern like these:
796 unsigned HOST_WIDE_INT mask;
799 mask += mask & -mask;
800 return (mask & (mask - 1)) == 0;
803 /* True iff depi or extru can be used to compute (reg & OP). */
805 and_operand (op, mode)
807 enum machine_mode mode;
809 return (register_operand (op, mode)
810 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
813 /* True iff depi can be used to compute (reg | MASK). */
816 unsigned HOST_WIDE_INT mask;
818 mask += mask & -mask;
819 return (mask & (mask - 1)) == 0;
822 /* True iff depi can be used to compute (reg | OP). */
824 ior_operand (op, mode)
826 enum machine_mode mode ATTRIBUTE_UNUSED;
828 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
832 lhs_lshift_operand (op, mode)
834 enum machine_mode mode;
836 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
839 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
840 Such values can be the left hand side x in (x << r), using the zvdepi
843 lhs_lshift_cint_operand (op, mode)
845 enum machine_mode mode ATTRIBUTE_UNUSED;
847 unsigned HOST_WIDE_INT x;
848 if (GET_CODE (op) != CONST_INT)
850 x = INTVAL (op) >> 4;
851 return (x & (x + 1)) == 0;
855 arith32_operand (op, mode)
857 enum machine_mode mode;
859 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
863 pc_or_label_operand (op, mode)
865 enum machine_mode mode ATTRIBUTE_UNUSED;
867 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
870 /* Legitimize PIC addresses. If the address is already
871 position-independent, we return ORIG. Newly generated
872 position-independent addresses go to REG. If we need more
873 than one register, we lose. */
876 legitimize_pic_address (orig, mode, reg)
878 enum machine_mode mode;
882 /* Labels need special handling. */
883 if (pic_label_operand (orig, mode))
885 /* We do not want to go through the movXX expanders here since that
886 would create recursion.
888 Nor do we really want to call a generator for a named pattern
889 since that requires multiple patterns if we want to support
892 So instead we just emit the raw set, which avoids the movXX
893 expanders completely. */
894 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
895 current_function_uses_pic_offset_table = 1;
898 if (GET_CODE (orig) == SYMBOL_REF)
904 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
905 gen_rtx_HIGH (word_mode, orig)));
907 = gen_rtx_MEM (Pmode,
908 gen_rtx_LO_SUM (Pmode, reg,
909 gen_rtx_UNSPEC (Pmode,
913 current_function_uses_pic_offset_table = 1;
914 RTX_UNCHANGING_P (pic_ref) = 1;
915 emit_move_insn (reg, pic_ref);
918 else if (GET_CODE (orig) == CONST)
922 if (GET_CODE (XEXP (orig, 0)) == PLUS
923 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
929 if (GET_CODE (XEXP (orig, 0)) == PLUS)
931 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
932 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
933 base == reg ? 0 : reg);
936 if (GET_CODE (orig) == CONST_INT)
938 if (INT_14_BITS (orig))
939 return plus_constant (base, INTVAL (orig));
940 orig = force_reg (Pmode, orig);
942 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
943 /* Likewise, should we set special REG_NOTEs here? */
948 /* Try machine-dependent ways of modifying an illegitimate address
949 to be legitimate. If we find one, return the new, valid address.
950 This macro is used in only one place: `memory_address' in explow.c.
952 OLDX is the address as it was before break_out_memory_refs was called.
953 In some cases it is useful to look at this to decide what needs to be done.
955 MODE and WIN are passed so that this macro can use
956 GO_IF_LEGITIMATE_ADDRESS.
958 It is always safe for this macro to do nothing. It exists to recognize
959 opportunities to optimize the output.
961 For the PA, transform:
963 memory(X + <large int>)
967 if (<large int> & mask) >= 16
968 Y = (<large int> & ~mask) + mask + 1 Round up.
970 Y = (<large int> & ~mask) Round down.
972 memory (Z + (<large int> - Y));
974 This is for CSE to find several similar references, and only use one Z.
976 X can either be a SYMBOL_REF or REG, but because combine can not
977 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
978 D will not fit in 14 bits.
980 MODE_FLOAT references allow displacements which fit in 5 bits, so use
983 MODE_INT references allow displacements which fit in 14 bits, so use
986 This relies on the fact that most mode MODE_FLOAT references will use FP
987 registers and most mode MODE_INT references will use integer registers.
988 (In the rare case of an FP register used in an integer MODE, we depend
989 on secondary reloads to clean things up.)
992 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
993 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
994 addressing modes to be used).
996 Put X and Z into registers. Then put the entire expression into
1000 hppa_legitimize_address (x, oldx, mode)
1001 rtx x, oldx ATTRIBUTE_UNUSED;
1002 enum machine_mode mode;
1007 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1009 /* Strip off CONST. */
1010 if (GET_CODE (x) == CONST)
1013 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1014 That should always be safe. */
1015 if (GET_CODE (x) == PLUS
1016 && GET_CODE (XEXP (x, 0)) == REG
1017 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1019 rtx reg = force_reg (Pmode, XEXP (x, 1));
1020 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1023 /* Note we must reject symbols which represent function addresses
1024 since the assembler/linker can't handle arithmetic on plabels. */
1025 if (GET_CODE (x) == PLUS
1026 && GET_CODE (XEXP (x, 1)) == CONST_INT
1027 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1028 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1029 || GET_CODE (XEXP (x, 0)) == REG))
1031 rtx int_part, ptr_reg;
1033 int offset = INTVAL (XEXP (x, 1));
1036 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1037 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
1039 /* Choose which way to round the offset. Round up if we
1040 are >= halfway to the next boundary. */
1041 if ((offset & mask) >= ((mask + 1) / 2))
1042 newoffset = (offset & ~ mask) + mask + 1;
1044 newoffset = (offset & ~ mask);
1046 /* If the newoffset will not fit in 14 bits (ldo), then
1047 handling this would take 4 or 5 instructions (2 to load
1048 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1049 add the new offset and the SYMBOL_REF.) Combine can
1050 not handle 4->2 or 5->2 combinations, so do not create
1052 if (! VAL_14_BITS_P (newoffset)
1053 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1055 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1058 gen_rtx_HIGH (Pmode, const_part));
1061 gen_rtx_LO_SUM (Pmode,
1062 tmp_reg, const_part));
1066 if (! VAL_14_BITS_P (newoffset))
1067 int_part = force_reg (Pmode, GEN_INT (newoffset));
1069 int_part = GEN_INT (newoffset);
1071 ptr_reg = force_reg (Pmode,
1072 gen_rtx_PLUS (Pmode,
1073 force_reg (Pmode, XEXP (x, 0)),
1076 return plus_constant (ptr_reg, offset - newoffset);
1079 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1081 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1082 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1083 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1084 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
1085 || GET_CODE (XEXP (x, 1)) == SUBREG)
1086 && GET_CODE (XEXP (x, 1)) != CONST)
1088 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1092 if (GET_CODE (reg1) != REG)
1093 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1095 reg2 = XEXP (XEXP (x, 0), 0);
1096 if (GET_CODE (reg2) != REG)
1097 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1099 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1100 gen_rtx_MULT (Pmode,
1106 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1108 Only do so for floating point modes since this is more speculative
1109 and we lose if it's an integer store. */
1110 if (GET_CODE (x) == PLUS
1111 && GET_CODE (XEXP (x, 0)) == PLUS
1112 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1113 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1114 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1115 && (mode == SFmode || mode == DFmode))
1118 /* First, try and figure out what to use as a base register. */
1119 rtx reg1, reg2, base, idx, orig_base;
1121 reg1 = XEXP (XEXP (x, 0), 1);
1126 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1127 then emit_move_sequence will turn on REG_POINTER so we'll know
1128 it's a base register below. */
1129 if (GET_CODE (reg1) != REG)
1130 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1132 if (GET_CODE (reg2) != REG)
1133 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1135 /* Figure out what the base and index are. */
1137 if (GET_CODE (reg1) == REG
1138 && REG_POINTER (reg1))
1141 orig_base = XEXP (XEXP (x, 0), 1);
1142 idx = gen_rtx_PLUS (Pmode,
1143 gen_rtx_MULT (Pmode,
1144 XEXP (XEXP (XEXP (x, 0), 0), 0),
1145 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1148 else if (GET_CODE (reg2) == REG
1149 && REG_POINTER (reg2))
1152 orig_base = XEXP (x, 1);
1159 /* If the index adds a large constant, try to scale the
1160 constant so that it can be loaded with only one insn. */
1161 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1162 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1163 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1164 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1166 /* Divide the CONST_INT by the scale factor, then add it to A. */
1167 int val = INTVAL (XEXP (idx, 1));
1169 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1170 reg1 = XEXP (XEXP (idx, 0), 0);
1171 if (GET_CODE (reg1) != REG)
1172 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1174 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1176 /* We can now generate a simple scaled indexed address. */
1179 (Pmode, gen_rtx_PLUS (Pmode,
1180 gen_rtx_MULT (Pmode, reg1,
1181 XEXP (XEXP (idx, 0), 1)),
1185 /* If B + C is still a valid base register, then add them. */
1186 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1187 && INTVAL (XEXP (idx, 1)) <= 4096
1188 && INTVAL (XEXP (idx, 1)) >= -4096)
1190 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1193 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1195 reg2 = XEXP (XEXP (idx, 0), 0);
1196 if (GET_CODE (reg2) != CONST_INT)
1197 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1199 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1200 gen_rtx_MULT (Pmode,
1206 /* Get the index into a register, then add the base + index and
1207 return a register holding the result. */
1209 /* First get A into a register. */
1210 reg1 = XEXP (XEXP (idx, 0), 0);
1211 if (GET_CODE (reg1) != REG)
1212 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1214 /* And get B into a register. */
1215 reg2 = XEXP (idx, 1);
1216 if (GET_CODE (reg2) != REG)
1217 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1219 reg1 = force_reg (Pmode,
1220 gen_rtx_PLUS (Pmode,
1221 gen_rtx_MULT (Pmode, reg1,
1222 XEXP (XEXP (idx, 0), 1)),
1225 /* Add the result to our base register and return. */
1226 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1230 /* Uh-oh. We might have an address for x[n-100000]. This needs
1231 special handling to avoid creating an indexed memory address
1232 with x-100000 as the base.
1234 If the constant part is small enough, then it's still safe because
1235 there is a guard page at the beginning and end of the data segment.
1237 Scaled references are common enough that we want to try and rearrange the
1238 terms so that we can use indexing for these addresses too. Only
1239 do the optimization for floatint point modes. */
1241 if (GET_CODE (x) == PLUS
1242 && symbolic_expression_p (XEXP (x, 1)))
1244 /* Ugly. We modify things here so that the address offset specified
1245 by the index expression is computed first, then added to x to form
1246 the entire address. */
1248 rtx regx1, regx2, regy1, regy2, y;
1250 /* Strip off any CONST. */
1252 if (GET_CODE (y) == CONST)
1255 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1257 /* See if this looks like
1258 (plus (mult (reg) (shadd_const))
1259 (const (plus (symbol_ref) (const_int))))
1261 Where const_int is small. In that case the const
1262 expression is a valid pointer for indexing.
1264 If const_int is big, but can be divided evenly by shadd_const
1265 and added to (reg). This allows more scaled indexed addresses. */
1266 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1267 && GET_CODE (XEXP (x, 0)) == MULT
1268 && GET_CODE (XEXP (y, 1)) == CONST_INT
1269 && INTVAL (XEXP (y, 1)) >= -4096
1270 && INTVAL (XEXP (y, 1)) <= 4095
1271 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1272 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1274 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1278 if (GET_CODE (reg1) != REG)
1279 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1281 reg2 = XEXP (XEXP (x, 0), 0);
1282 if (GET_CODE (reg2) != REG)
1283 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1285 return force_reg (Pmode,
1286 gen_rtx_PLUS (Pmode,
1287 gen_rtx_MULT (Pmode,
1292 else if ((mode == DFmode || mode == SFmode)
1293 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1294 && GET_CODE (XEXP (x, 0)) == MULT
1295 && GET_CODE (XEXP (y, 1)) == CONST_INT
1296 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1297 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1298 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1301 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1302 / INTVAL (XEXP (XEXP (x, 0), 1))));
1303 regx2 = XEXP (XEXP (x, 0), 0);
1304 if (GET_CODE (regx2) != REG)
1305 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1306 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1310 gen_rtx_PLUS (Pmode,
1311 gen_rtx_MULT (Pmode, regx2,
1312 XEXP (XEXP (x, 0), 1)),
1313 force_reg (Pmode, XEXP (y, 0))));
1315 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1316 && INTVAL (XEXP (y, 1)) >= -4096
1317 && INTVAL (XEXP (y, 1)) <= 4095)
1319 /* This is safe because of the guard page at the
1320 beginning and end of the data space. Just
1321 return the original address. */
1326 /* Doesn't look like one we can optimize. */
1327 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1328 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1329 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1330 regx1 = force_reg (Pmode,
1331 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1333 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1341 /* For the HPPA, REG and REG+CONST is cost 0
1342 and addresses involving symbolic constants are cost 2.
1344 PIC addresses are very expensive.
1346 It is no coincidence that this has the same structure
1347 as GO_IF_LEGITIMATE_ADDRESS. */
1350 hppa_address_cost (X)
1353 switch (GET_CODE (X))
1366 /* Compute a (partial) cost for rtx X. Return true if the complete
1367 cost has been computed, and false if subexpressions should be
1368 scanned. In either case, *TOTAL contains the cost result. */
1371 hppa_rtx_costs (x, code, outer_code, total)
1373 int code, outer_code;
1379 if (INTVAL (x) == 0)
1381 else if (INT_14_BITS (x))
1398 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1399 && outer_code != SET)
1406 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1407 *total = COSTS_N_INSNS (3);
1408 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1409 *total = COSTS_N_INSNS (8);
1411 *total = COSTS_N_INSNS (20);
1415 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1417 *total = COSTS_N_INSNS (14);
1425 *total = COSTS_N_INSNS (60);
1428 case PLUS: /* this includes shNadd insns */
1430 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1431 *total = COSTS_N_INSNS (3);
1433 *total = COSTS_N_INSNS (1);
1439 *total = COSTS_N_INSNS (1);
1447 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1448 new rtx with the correct mode. */
1450 force_mode (mode, orig)
1451 enum machine_mode mode;
1454 if (mode == GET_MODE (orig))
1457 if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
1460 return gen_rtx_REG (mode, REGNO (orig));
1463 /* Emit insns to move operands[1] into operands[0].
1465 Return 1 if we have written out everything that needs to be done to
1466 do the move. Otherwise, return 0 and the caller will emit the move
1469 Note SCRATCH_REG may not be in the proper mode depending on how it
1470 will be used. This routine is responsible for creating a new copy
1471 of SCRATCH_REG in the proper mode. */
1474 emit_move_sequence (operands, mode, scratch_reg)
1476 enum machine_mode mode;
1479 register rtx operand0 = operands[0];
1480 register rtx operand1 = operands[1];
1484 && reload_in_progress && GET_CODE (operand0) == REG
1485 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1486 operand0 = reg_equiv_mem[REGNO (operand0)];
1487 else if (scratch_reg
1488 && reload_in_progress && GET_CODE (operand0) == SUBREG
1489 && GET_CODE (SUBREG_REG (operand0)) == REG
1490 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1492 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1493 the code which tracks sets/uses for delete_output_reload. */
1494 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1495 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1496 SUBREG_BYTE (operand0));
1497 operand0 = alter_subreg (&temp);
1501 && reload_in_progress && GET_CODE (operand1) == REG
1502 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1503 operand1 = reg_equiv_mem[REGNO (operand1)];
1504 else if (scratch_reg
1505 && reload_in_progress && GET_CODE (operand1) == SUBREG
1506 && GET_CODE (SUBREG_REG (operand1)) == REG
1507 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1509 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1510 the code which tracks sets/uses for delete_output_reload. */
1511 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1512 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1513 SUBREG_BYTE (operand1));
1514 operand1 = alter_subreg (&temp);
1517 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1518 && ((tem = find_replacement (&XEXP (operand0, 0)))
1519 != XEXP (operand0, 0)))
1520 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1521 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1522 && ((tem = find_replacement (&XEXP (operand1, 0)))
1523 != XEXP (operand1, 0)))
1524 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1526 /* Handle secondary reloads for loads/stores of FP registers from
1527 REG+D addresses where D does not fit in 5 bits, including
1528 (subreg (mem (addr))) cases. */
1529 if (fp_reg_operand (operand0, mode)
1530 && ((GET_CODE (operand1) == MEM
1531 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1532 || ((GET_CODE (operand1) == SUBREG
1533 && GET_CODE (XEXP (operand1, 0)) == MEM
1534 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1537 if (GET_CODE (operand1) == SUBREG)
1538 operand1 = XEXP (operand1, 0);
1540 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1541 it in WORD_MODE regardless of what mode it was originally given
1543 scratch_reg = force_mode (word_mode, scratch_reg);
1545 /* D might not fit in 14 bits either; for such cases load D into
1547 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1549 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1550 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1552 XEXP (XEXP (operand1, 0), 0),
1556 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1557 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1558 gen_rtx_MEM (mode, scratch_reg)));
1561 else if (fp_reg_operand (operand1, mode)
1562 && ((GET_CODE (operand0) == MEM
1563 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1564 || ((GET_CODE (operand0) == SUBREG)
1565 && GET_CODE (XEXP (operand0, 0)) == MEM
1566 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1569 if (GET_CODE (operand0) == SUBREG)
1570 operand0 = XEXP (operand0, 0);
1572 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1573 it in WORD_MODE regardless of what mode it was originally given
1575 scratch_reg = force_mode (word_mode, scratch_reg);
1577 /* D might not fit in 14 bits either; for such cases load D into
1579 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1581 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1582 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1585 XEXP (XEXP (operand0, 0),
1590 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1591 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1595 /* Handle secondary reloads for loads of FP registers from constant
1596 expressions by forcing the constant into memory.
1598 use scratch_reg to hold the address of the memory location.
1600 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1601 NO_REGS when presented with a const_int and a register class
1602 containing only FP registers. Doing so unfortunately creates
1603 more problems than it solves. Fix this for 2.5. */
1604 else if (fp_reg_operand (operand0, mode)
1605 && CONSTANT_P (operand1)
1610 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1611 it in WORD_MODE regardless of what mode it was originally given
1613 scratch_reg = force_mode (word_mode, scratch_reg);
1615 /* Force the constant into memory and put the address of the
1616 memory location into scratch_reg. */
1617 xoperands[0] = scratch_reg;
1618 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1619 emit_move_sequence (xoperands, Pmode, 0);
1621 /* Now load the destination register. */
1622 emit_insn (gen_rtx_SET (mode, operand0,
1623 gen_rtx_MEM (mode, scratch_reg)));
1626 /* Handle secondary reloads for SAR. These occur when trying to load
1627 the SAR from memory, FP register, or with a constant. */
1628 else if (GET_CODE (operand0) == REG
1629 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1630 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1631 && (GET_CODE (operand1) == MEM
1632 || GET_CODE (operand1) == CONST_INT
1633 || (GET_CODE (operand1) == REG
1634 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1637 /* D might not fit in 14 bits either; for such cases load D into
1639 if (GET_CODE (operand1) == MEM
1640 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1642 /* We are reloading the address into the scratch register, so we
1643 want to make sure the scratch register is a full register. */
1644 scratch_reg = force_mode (word_mode, scratch_reg);
1646 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1647 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1650 XEXP (XEXP (operand1, 0),
1654 /* Now we are going to load the scratch register from memory,
1655 we want to load it in the same width as the original MEM,
1656 which must be the same as the width of the ultimate destination,
1658 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1660 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1665 /* We want to load the scratch register using the same mode as
1666 the ultimate destination. */
1667 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1669 emit_move_insn (scratch_reg, operand1);
1672 /* And emit the insn to set the ultimate destination. We know that
1673 the scratch register has the same mode as the destination at this
1675 emit_move_insn (operand0, scratch_reg);
1678 /* Handle most common case: storing into a register. */
1679 else if (register_operand (operand0, mode))
1681 if (register_operand (operand1, mode)
1682 || (GET_CODE (operand1) == CONST_INT
1683 && cint_ok_for_move (INTVAL (operand1)))
1684 || (operand1 == CONST0_RTX (mode))
1685 || (GET_CODE (operand1) == HIGH
1686 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1687 /* Only `general_operands' can come here, so MEM is ok. */
1688 || GET_CODE (operand1) == MEM)
1690 /* Run this case quickly. */
1691 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1695 else if (GET_CODE (operand0) == MEM)
1697 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1698 && !(reload_in_progress || reload_completed))
1700 rtx temp = gen_reg_rtx (DFmode);
1702 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1703 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1706 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1708 /* Run this case quickly. */
1709 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1712 if (! (reload_in_progress || reload_completed))
1714 operands[0] = validize_mem (operand0);
1715 operands[1] = operand1 = force_reg (mode, operand1);
1719 /* Simplify the source if we need to.
1720 Note we do have to handle function labels here, even though we do
1721 not consider them legitimate constants. Loop optimizations can
1722 call the emit_move_xxx with one as a source. */
1723 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1724 || function_label_operand (operand1, mode)
1725 || (GET_CODE (operand1) == HIGH
1726 && symbolic_operand (XEXP (operand1, 0), mode)))
1730 if (GET_CODE (operand1) == HIGH)
1733 operand1 = XEXP (operand1, 0);
1735 if (symbolic_operand (operand1, mode))
1737 /* Argh. The assembler and linker can't handle arithmetic
1740 So we force the plabel into memory, load operand0 from
1741 the memory location, then add in the constant part. */
1742 if ((GET_CODE (operand1) == CONST
1743 && GET_CODE (XEXP (operand1, 0)) == PLUS
1744 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1745 || function_label_operand (operand1, mode))
1747 rtx temp, const_part;
1749 /* Figure out what (if any) scratch register to use. */
1750 if (reload_in_progress || reload_completed)
1752 scratch_reg = scratch_reg ? scratch_reg : operand0;
1753 /* SCRATCH_REG will hold an address and maybe the actual
1754 data. We want it in WORD_MODE regardless of what mode it
1755 was originally given to us. */
1756 scratch_reg = force_mode (word_mode, scratch_reg);
1759 scratch_reg = gen_reg_rtx (Pmode);
1761 if (GET_CODE (operand1) == CONST)
1763 /* Save away the constant part of the expression. */
1764 const_part = XEXP (XEXP (operand1, 0), 1);
1765 if (GET_CODE (const_part) != CONST_INT)
1768 /* Force the function label into memory. */
1769 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1773 /* No constant part. */
1774 const_part = NULL_RTX;
1776 /* Force the function label into memory. */
1777 temp = force_const_mem (mode, operand1);
1781 /* Get the address of the memory location. PIC-ify it if
1783 temp = XEXP (temp, 0);
1785 temp = legitimize_pic_address (temp, mode, scratch_reg);
1787 /* Put the address of the memory location into our destination
1790 emit_move_sequence (operands, mode, scratch_reg);
1792 /* Now load from the memory location into our destination
1794 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1795 emit_move_sequence (operands, mode, scratch_reg);
1797 /* And add back in the constant part. */
1798 if (const_part != NULL_RTX)
1799 expand_inc (operand0, const_part);
1808 if (reload_in_progress || reload_completed)
1810 temp = scratch_reg ? scratch_reg : operand0;
1811 /* TEMP will hold an address and maybe the actual
1812 data. We want it in WORD_MODE regardless of what mode it
1813 was originally given to us. */
1814 temp = force_mode (word_mode, temp);
1817 temp = gen_reg_rtx (Pmode);
1819 /* (const (plus (symbol) (const_int))) must be forced to
1820 memory during/after reload if the const_int will not fit
1822 if (GET_CODE (operand1) == CONST
1823 && GET_CODE (XEXP (operand1, 0)) == PLUS
1824 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1825 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1826 && (reload_completed || reload_in_progress)
1829 operands[1] = force_const_mem (mode, operand1);
1830 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1832 emit_move_sequence (operands, mode, temp);
1836 operands[1] = legitimize_pic_address (operand1, mode, temp);
1837 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1840 /* On the HPPA, references to data space are supposed to use dp,
1841 register 27, but showing it in the RTL inhibits various cse
1842 and loop optimizations. */
1847 if (reload_in_progress || reload_completed)
1849 temp = scratch_reg ? scratch_reg : operand0;
1850 /* TEMP will hold an address and maybe the actual
1851 data. We want it in WORD_MODE regardless of what mode it
1852 was originally given to us. */
1853 temp = force_mode (word_mode, temp);
1856 temp = gen_reg_rtx (mode);
1858 /* Loading a SYMBOL_REF into a register makes that register
1859 safe to be used as the base in an indexed address.
1861 Don't mark hard registers though. That loses. */
1862 if (GET_CODE (operand0) == REG
1863 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1864 REG_POINTER (operand0) = 1;
1865 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1866 REG_POINTER (temp) = 1;
1868 set = gen_rtx_SET (mode, operand0, temp);
1870 set = gen_rtx_SET (VOIDmode,
1872 gen_rtx_LO_SUM (mode, temp, operand1));
1874 emit_insn (gen_rtx_SET (VOIDmode,
1876 gen_rtx_HIGH (mode, operand1)));
1882 else if (GET_CODE (operand1) != CONST_INT
1883 || ! cint_ok_for_move (INTVAL (operand1)))
1885 rtx extend = NULL_RTX;
1888 if (TARGET_64BIT && GET_CODE (operand1) == CONST_INT
1889 && HOST_BITS_PER_WIDE_INT > 32
1890 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1892 HOST_WIDE_INT val = INTVAL (operand1);
1895 /* Extract the low order 32 bits of the value and sign extend.
1896 If the new value is the same as the original value, we can
1897 can use the original value as-is. If the new value is
1898 different, we use it and insert the most-significant 32-bits
1899 of the original value into the final result. */
1900 nval = ((val & (((HOST_WIDE_INT) 2 << 31) - 1))
1901 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1904 #if HOST_BITS_PER_WIDE_INT > 32
1905 extend = GEN_INT (val >> 32);
1907 operand1 = GEN_INT (nval);
1911 if (reload_in_progress || reload_completed)
1914 temp = gen_reg_rtx (mode);
1916 /* We don't directly split DImode constants on 32-bit targets
1917 because PLUS uses an 11-bit immediate and the insn sequence
1918 generated is not as efficient as the one using HIGH/LO_SUM. */
1919 if (GET_CODE (operand1) == CONST_INT
1920 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
1922 /* Directly break constant into high and low parts. This
1923 provides better optimization opportunities because various
1924 passes recognize constants split with PLUS but not LO_SUM.
1925 We use a 14-bit signed low part except when the addition
1926 of 0x4000 to the high part might change the sign of the
1928 HOST_WIDE_INT value = INTVAL (operand1);
1929 HOST_WIDE_INT low = value & 0x3fff;
1930 HOST_WIDE_INT high = value & ~ 0x3fff;
1934 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1942 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1943 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1947 emit_insn (gen_rtx_SET (VOIDmode, temp,
1948 gen_rtx_HIGH (mode, operand1)));
1949 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1952 emit_move_insn (operands[0], operands[1]);
1954 if (extend != NULL_RTX)
1955 emit_insn (gen_insv (operands[0], GEN_INT (32), const0_rtx,
1961 /* Now have insn-emit do whatever it normally does. */
1965 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1966 it will need a link/runtime reloc). */
1974 switch (TREE_CODE (exp))
1981 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1982 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1987 case NON_LVALUE_EXPR:
1988 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1994 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1995 if (TREE_VALUE (link) != 0)
1996 reloc |= reloc_needed (TREE_VALUE (link));
2009 /* Does operand (which is a symbolic_operand) live in text space?
2010 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2014 read_only_operand (operand, mode)
2016 enum machine_mode mode ATTRIBUTE_UNUSED;
2018 if (GET_CODE (operand) == CONST)
2019 operand = XEXP (XEXP (operand, 0), 0);
2022 if (GET_CODE (operand) == SYMBOL_REF)
2023 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2027 if (GET_CODE (operand) == SYMBOL_REF)
2028 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2034 /* Return the best assembler insn template
2035 for moving operands[1] into operands[0] as a fullword. */
2037 singlemove_string (operands)
2040 HOST_WIDE_INT intval;
2042 if (GET_CODE (operands[0]) == MEM)
2043 return "stw %r1,%0";
2044 if (GET_CODE (operands[1]) == MEM)
2046 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2051 if (GET_MODE (operands[1]) != SFmode)
2054 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2056 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2057 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2059 operands[1] = GEN_INT (i);
2060 /* Fall through to CONST_INT case. */
2062 if (GET_CODE (operands[1]) == CONST_INT)
2064 intval = INTVAL (operands[1]);
2066 if (VAL_14_BITS_P (intval))
2068 else if ((intval & 0x7ff) == 0)
2069 return "ldil L'%1,%0";
2070 else if (zdepi_cint_p (intval))
2071 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2073 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2075 return "copy %1,%0";
2079 /* Compute position (in OP[1]) and width (in OP[2])
2080 useful for copying IMM to a register using the zdepi
2081 instructions. Store the immediate value to insert in OP[0]. */
2083 compute_zdepwi_operands (imm, op)
2084 unsigned HOST_WIDE_INT imm;
2089 /* Find the least significant set bit in IMM. */
2090 for (lsb = 0; lsb < 32; lsb++)
2097 /* Choose variants based on *sign* of the 5-bit field. */
2098 if ((imm & 0x10) == 0)
2099 len = (lsb <= 28) ? 4 : 32 - lsb;
2102 /* Find the width of the bitstring in IMM. */
2103 for (len = 5; len < 32; len++)
2105 if ((imm & (1 << len)) == 0)
2109 /* Sign extend IMM as a 5-bit value. */
2110 imm = (imm & 0xf) - 0x10;
2118 /* Compute position (in OP[1]) and width (in OP[2])
2119 useful for copying IMM to a register using the depdi,z
2120 instructions. Store the immediate value to insert in OP[0]. */
2122 compute_zdepdi_operands (imm, op)
2123 unsigned HOST_WIDE_INT imm;
2126 HOST_WIDE_INT lsb, len;
2128 /* Find the least significant set bit in IMM. */
2129 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2136 /* Choose variants based on *sign* of the 5-bit field. */
2137 if ((imm & 0x10) == 0)
2138 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2139 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2142 /* Find the width of the bitstring in IMM. */
2143 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2145 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2149 /* Sign extend IMM as a 5-bit value. */
2150 imm = (imm & 0xf) - 0x10;
2158 /* Output assembler code to perform a doubleword move insn
2159 with operands OPERANDS. */
2162 output_move_double (operands)
2165 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2167 rtx addreg0 = 0, addreg1 = 0;
2169 /* First classify both operands. */
2171 if (REG_P (operands[0]))
2173 else if (offsettable_memref_p (operands[0]))
2175 else if (GET_CODE (operands[0]) == MEM)
2180 if (REG_P (operands[1]))
2182 else if (CONSTANT_P (operands[1]))
2184 else if (offsettable_memref_p (operands[1]))
2186 else if (GET_CODE (operands[1]) == MEM)
2191 /* Check for the cases that the operand constraints are not
2192 supposed to allow to happen. Abort if we get one,
2193 because generating code for these cases is painful. */
2195 if (optype0 != REGOP && optype1 != REGOP)
2198 /* Handle auto decrementing and incrementing loads and stores
2199 specifically, since the structure of the function doesn't work
2200 for them without major modification. Do it better when we learn
2201 this port about the general inc/dec addressing of PA.
2202 (This was written by tege. Chide him if it doesn't work.) */
2204 if (optype0 == MEMOP)
2206 /* We have to output the address syntax ourselves, since print_operand
2207 doesn't deal with the addresses we want to use. Fix this later. */
2209 rtx addr = XEXP (operands[0], 0);
2210 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2212 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2214 operands[0] = XEXP (addr, 0);
2215 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2218 if (!reg_overlap_mentioned_p (high_reg, addr))
2220 /* No overlap between high target register and address
2221 register. (We do this in a non-obvious way to
2222 save a register file writeback) */
2223 if (GET_CODE (addr) == POST_INC)
2224 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2225 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2230 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2232 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2234 operands[0] = XEXP (addr, 0);
2235 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2238 if (!reg_overlap_mentioned_p (high_reg, addr))
2240 /* No overlap between high target register and address
2241 register. (We do this in a non-obvious way to
2242 save a register file writeback) */
2243 if (GET_CODE (addr) == PRE_INC)
2244 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2245 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2251 if (optype1 == MEMOP)
2253 /* We have to output the address syntax ourselves, since print_operand
2254 doesn't deal with the addresses we want to use. Fix this later. */
2256 rtx addr = XEXP (operands[1], 0);
2257 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2259 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2261 operands[1] = XEXP (addr, 0);
2262 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2265 if (!reg_overlap_mentioned_p (high_reg, addr))
2267 /* No overlap between high target register and address
2268 register. (We do this in a non-obvious way to
2269 save a register file writeback) */
2270 if (GET_CODE (addr) == POST_INC)
2271 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2272 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2276 /* This is an undefined situation. We should load into the
2277 address register *and* update that register. Probably
2278 we don't need to handle this at all. */
2279 if (GET_CODE (addr) == POST_INC)
2280 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2281 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2284 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2286 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2288 operands[1] = XEXP (addr, 0);
2289 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2292 if (!reg_overlap_mentioned_p (high_reg, addr))
2294 /* No overlap between high target register and address
2295 register. (We do this in a non-obvious way to
2296 save a register file writeback) */
2297 if (GET_CODE (addr) == PRE_INC)
2298 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2299 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2303 /* This is an undefined situation. We should load into the
2304 address register *and* update that register. Probably
2305 we don't need to handle this at all. */
2306 if (GET_CODE (addr) == PRE_INC)
2307 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2308 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2311 else if (GET_CODE (addr) == PLUS
2312 && GET_CODE (XEXP (addr, 0)) == MULT)
2314 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2316 if (!reg_overlap_mentioned_p (high_reg, addr))
2320 xoperands[0] = high_reg;
2321 xoperands[1] = XEXP (addr, 1);
2322 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2323 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2324 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2326 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2332 xoperands[0] = high_reg;
2333 xoperands[1] = XEXP (addr, 1);
2334 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2335 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2336 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2338 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2343 /* If an operand is an unoffsettable memory ref, find a register
2344 we can increment temporarily to make it refer to the second word. */
2346 if (optype0 == MEMOP)
2347 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2349 if (optype1 == MEMOP)
2350 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2352 /* Ok, we can do one word at a time.
2353 Normally we do the low-numbered word first.
2355 In either case, set up in LATEHALF the operands to use
2356 for the high-numbered word and in some cases alter the
2357 operands in OPERANDS to be suitable for the low-numbered word. */
2359 if (optype0 == REGOP)
2360 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2361 else if (optype0 == OFFSOP)
2362 latehalf[0] = adjust_address (operands[0], SImode, 4);
2364 latehalf[0] = operands[0];
2366 if (optype1 == REGOP)
2367 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2368 else if (optype1 == OFFSOP)
2369 latehalf[1] = adjust_address (operands[1], SImode, 4);
2370 else if (optype1 == CNSTOP)
2371 split_double (operands[1], &operands[1], &latehalf[1]);
2373 latehalf[1] = operands[1];
2375 /* If the first move would clobber the source of the second one,
2376 do them in the other order.
2378 This can happen in two cases:
2380 mem -> register where the first half of the destination register
2381 is the same register used in the memory's address. Reload
2382 can create such insns.
2384 mem in this case will be either register indirect or register
2385 indirect plus a valid offset.
2387 register -> register move where REGNO(dst) == REGNO(src + 1)
2388 someone (Tim/Tege?) claimed this can happen for parameter loads.
2390 Handle mem -> register case first. */
2391 if (optype0 == REGOP
2392 && (optype1 == MEMOP || optype1 == OFFSOP)
2393 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2396 /* Do the late half first. */
2398 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2399 output_asm_insn (singlemove_string (latehalf), latehalf);
2403 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2404 return singlemove_string (operands);
2407 /* Now handle register -> register case. */
2408 if (optype0 == REGOP && optype1 == REGOP
2409 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2411 output_asm_insn (singlemove_string (latehalf), latehalf);
2412 return singlemove_string (operands);
2415 /* Normal case: do the two words, low-numbered first. */
2417 output_asm_insn (singlemove_string (operands), operands);
2419 /* Make any unoffsettable addresses point at high-numbered word. */
2421 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2423 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2426 output_asm_insn (singlemove_string (latehalf), latehalf);
2428 /* Undo the adds we just did. */
2430 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2432 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2438 output_fp_move_double (operands)
2441 if (FP_REG_P (operands[0]))
2443 if (FP_REG_P (operands[1])
2444 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2445 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2447 output_asm_insn ("fldd%F1 %1,%0", operands);
2449 else if (FP_REG_P (operands[1]))
2451 output_asm_insn ("fstd%F0 %1,%0", operands);
2453 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2455 if (GET_CODE (operands[0]) == REG)
2458 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2459 xoperands[0] = operands[0];
2460 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2462 /* This is a pain. You have to be prepared to deal with an
2463 arbitrary address here including pre/post increment/decrement.
2465 so avoid this in the MD. */
2473 /* Return a REG that occurs in ADDR with coefficient 1.
2474 ADDR can be effectively incremented by incrementing REG. */
2477 find_addr_reg (addr)
2480 while (GET_CODE (addr) == PLUS)
2482 if (GET_CODE (XEXP (addr, 0)) == REG)
2483 addr = XEXP (addr, 0);
2484 else if (GET_CODE (XEXP (addr, 1)) == REG)
2485 addr = XEXP (addr, 1);
2486 else if (CONSTANT_P (XEXP (addr, 0)))
2487 addr = XEXP (addr, 1);
2488 else if (CONSTANT_P (XEXP (addr, 1)))
2489 addr = XEXP (addr, 0);
2493 if (GET_CODE (addr) == REG)
2498 /* Emit code to perform a block move.
2500 OPERANDS[0] is the destination pointer as a REG, clobbered.
2501 OPERANDS[1] is the source pointer as a REG, clobbered.
2502 OPERANDS[2] is a register for temporary storage.
2503 OPERANDS[4] is the size as a CONST_INT
2504 OPERANDS[3] is a register for temporary storage.
2505 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2506 OPERANDS[6] is another temporary register. */
2509 output_block_move (operands, size_is_constant)
2511 int size_is_constant ATTRIBUTE_UNUSED;
2513 int align = INTVAL (operands[5]);
2514 unsigned long n_bytes = INTVAL (operands[4]);
2516 /* We can't move more than four bytes at a time because the PA
2517 has no longer integer move insns. (Could use fp mem ops?) */
2521 /* Note that we know each loop below will execute at least twice
2522 (else we would have open-coded the copy). */
2526 /* Pre-adjust the loop counter. */
2527 operands[4] = GEN_INT (n_bytes - 8);
2528 output_asm_insn ("ldi %4,%2", operands);
2531 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2532 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2533 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2534 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2535 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2537 /* Handle the residual. There could be up to 7 bytes of
2538 residual to copy! */
2539 if (n_bytes % 8 != 0)
2541 operands[4] = GEN_INT (n_bytes % 4);
2542 if (n_bytes % 8 >= 4)
2543 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2544 if (n_bytes % 4 != 0)
2545 output_asm_insn ("ldw 0(%1),%6", operands);
2546 if (n_bytes % 8 >= 4)
2547 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2548 if (n_bytes % 4 != 0)
2549 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2554 /* Pre-adjust the loop counter. */
2555 operands[4] = GEN_INT (n_bytes - 4);
2556 output_asm_insn ("ldi %4,%2", operands);
2559 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2560 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2561 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2562 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2563 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2565 /* Handle the residual. */
2566 if (n_bytes % 4 != 0)
2568 if (n_bytes % 4 >= 2)
2569 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2570 if (n_bytes % 2 != 0)
2571 output_asm_insn ("ldb 0(%1),%6", operands);
2572 if (n_bytes % 4 >= 2)
2573 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2574 if (n_bytes % 2 != 0)
2575 output_asm_insn ("stb %6,0(%0)", operands);
2580 /* Pre-adjust the loop counter. */
2581 operands[4] = GEN_INT (n_bytes - 2);
2582 output_asm_insn ("ldi %4,%2", operands);
2585 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2586 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2587 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2588 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2589 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2591 /* Handle the residual. */
2592 if (n_bytes % 2 != 0)
2594 output_asm_insn ("ldb 0(%1),%3", operands);
2595 output_asm_insn ("stb %3,0(%0)", operands);
2604 /* Count the number of insns necessary to handle this block move.
2606 Basic structure is the same as emit_block_move, except that we
2607 count insns rather than emit them. */
2610 compute_movstrsi_length (insn)
2613 rtx pat = PATTERN (insn);
2614 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2615 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2616 unsigned int n_insns = 0;
2618 /* We can't move more than four bytes at a time because the PA
2619 has no longer integer move insns. (Could use fp mem ops?) */
2623 /* The basic copying loop. */
2627 if (n_bytes % (2 * align) != 0)
2629 if ((n_bytes % (2 * align)) >= align)
2632 if ((n_bytes % align) != 0)
2636 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2642 output_and (operands)
2645 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2647 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2648 int ls0, ls1, ms0, p, len;
2650 for (ls0 = 0; ls0 < 32; ls0++)
2651 if ((mask & (1 << ls0)) == 0)
2654 for (ls1 = ls0; ls1 < 32; ls1++)
2655 if ((mask & (1 << ls1)) != 0)
2658 for (ms0 = ls1; ms0 < 32; ms0++)
2659 if ((mask & (1 << ms0)) == 0)
2672 operands[2] = GEN_INT (len);
2673 return "{extru|extrw,u} %1,31,%2,%0";
2677 /* We could use this `depi' for the case above as well, but `depi'
2678 requires one more register file access than an `extru'. */
2683 operands[2] = GEN_INT (p);
2684 operands[3] = GEN_INT (len);
2685 return "{depi|depwi} 0,%2,%3,%0";
2689 return "and %1,%2,%0";
2692 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2693 storing the result in operands[0]. */
2695 output_64bit_and (operands)
2698 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2700 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2701 int ls0, ls1, ms0, p, len;
2703 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2704 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2707 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2708 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2711 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2712 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2715 if (ms0 != HOST_BITS_PER_WIDE_INT)
2718 if (ls1 == HOST_BITS_PER_WIDE_INT)
2725 operands[2] = GEN_INT (len);
2726 return "extrd,u %1,63,%2,%0";
2730 /* We could use this `depi' for the case above as well, but `depi'
2731 requires one more register file access than an `extru'. */
2736 operands[2] = GEN_INT (p);
2737 operands[3] = GEN_INT (len);
2738 return "depdi 0,%2,%3,%0";
2742 return "and %1,%2,%0";
2746 output_ior (operands)
2749 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2750 int bs0, bs1, p, len;
2752 if (INTVAL (operands[2]) == 0)
2753 return "copy %1,%0";
2755 for (bs0 = 0; bs0 < 32; bs0++)
2756 if ((mask & (1 << bs0)) != 0)
2759 for (bs1 = bs0; bs1 < 32; bs1++)
2760 if ((mask & (1 << bs1)) == 0)
2763 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2769 operands[2] = GEN_INT (p);
2770 operands[3] = GEN_INT (len);
2771 return "{depi|depwi} -1,%2,%3,%0";
2774 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2775 storing the result in operands[0]. */
2777 output_64bit_ior (operands)
2780 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2781 int bs0, bs1, p, len;
2783 if (INTVAL (operands[2]) == 0)
2784 return "copy %1,%0";
2786 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2787 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
2790 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2791 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
2794 if (bs1 != HOST_BITS_PER_WIDE_INT
2795 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2801 operands[2] = GEN_INT (p);
2802 operands[3] = GEN_INT (len);
2803 return "depdi -1,%2,%3,%0";
2806 /* Target hook for assembling integer objects. This code handles
2807 aligned SI and DI integers specially, since function references must
2808 be preceded by P%. */
2811 pa_assemble_integer (x, size, aligned_p)
2816 if (size == UNITS_PER_WORD && aligned_p
2817 && function_label_operand (x, VOIDmode))
2819 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
2820 output_addr_const (asm_out_file, x);
2821 fputc ('\n', asm_out_file);
2824 return default_assemble_integer (x, size, aligned_p);
2827 /* Output an ascii string. */
2829 output_ascii (file, p, size)
2836 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2838 /* The HP assembler can only take strings of 256 characters at one
2839 time. This is a limitation on input line length, *not* the
2840 length of the string. Sigh. Even worse, it seems that the
2841 restriction is in number of input characters (see \xnn &
2842 \whatever). So we have to do this very carefully. */
2844 fputs ("\t.STRING \"", file);
2847 for (i = 0; i < size; i += 4)
2851 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2853 register unsigned int c = (unsigned char) p[i + io];
2855 if (c == '\"' || c == '\\')
2856 partial_output[co++] = '\\';
2857 if (c >= ' ' && c < 0177)
2858 partial_output[co++] = c;
2862 partial_output[co++] = '\\';
2863 partial_output[co++] = 'x';
2864 hexd = c / 16 - 0 + '0';
2866 hexd -= '9' - 'a' + 1;
2867 partial_output[co++] = hexd;
2868 hexd = c % 16 - 0 + '0';
2870 hexd -= '9' - 'a' + 1;
2871 partial_output[co++] = hexd;
2874 if (chars_output + co > 243)
2876 fputs ("\"\n\t.STRING \"", file);
2879 fwrite (partial_output, 1, (size_t) co, file);
2883 fputs ("\"\n", file);
2886 /* Try to rewrite floating point comparisons & branches to avoid
2887 useless add,tr insns.
2889 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2890 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2891 first attempt to remove useless add,tr insns. It is zero
2892 for the second pass as reorg sometimes leaves bogus REG_DEAD
2895 When CHECK_NOTES is zero we can only eliminate add,tr insns
2896 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2899 remove_useless_addtr_insns (check_notes)
2903 static int pass = 0;
2905 /* This is fairly cheap, so always run it when optimizing. */
2909 int fbranch_count = 0;
2911 /* Walk all the insns in this function looking for fcmp & fbranch
2912 instructions. Keep track of how many of each we find. */
2913 for (insn = get_insns (); insn; insn = next_insn (insn))
2917 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2918 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2921 tmp = PATTERN (insn);
2923 /* It must be a set. */
2924 if (GET_CODE (tmp) != SET)
2927 /* If the destination is CCFP, then we've found an fcmp insn. */
2928 tmp = SET_DEST (tmp);
2929 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2935 tmp = PATTERN (insn);
2936 /* If this is an fbranch instruction, bump the fbranch counter. */
2937 if (GET_CODE (tmp) == SET
2938 && SET_DEST (tmp) == pc_rtx
2939 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2940 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2941 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2942 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2950 /* Find all floating point compare + branch insns. If possible,
2951 reverse the comparison & the branch to avoid add,tr insns. */
2952 for (insn = get_insns (); insn; insn = next_insn (insn))
2956 /* Ignore anything that isn't an INSN. */
2957 if (GET_CODE (insn) != INSN)
2960 tmp = PATTERN (insn);
2962 /* It must be a set. */
2963 if (GET_CODE (tmp) != SET)
2966 /* The destination must be CCFP, which is register zero. */
2967 tmp = SET_DEST (tmp);
2968 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2971 /* INSN should be a set of CCFP.
2973 See if the result of this insn is used in a reversed FP
2974 conditional branch. If so, reverse our condition and
2975 the branch. Doing so avoids useless add,tr insns. */
2976 next = next_insn (insn);
2979 /* Jumps, calls and labels stop our search. */
2980 if (GET_CODE (next) == JUMP_INSN
2981 || GET_CODE (next) == CALL_INSN
2982 || GET_CODE (next) == CODE_LABEL)
2985 /* As does another fcmp insn. */
2986 if (GET_CODE (next) == INSN
2987 && GET_CODE (PATTERN (next)) == SET
2988 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2989 && REGNO (SET_DEST (PATTERN (next))) == 0)
2992 next = next_insn (next);
2995 /* Is NEXT_INSN a branch? */
2997 && GET_CODE (next) == JUMP_INSN)
2999 rtx pattern = PATTERN (next);
3001 /* If it a reversed fp conditional branch (eg uses add,tr)
3002 and CCFP dies, then reverse our conditional and the branch
3003 to avoid the add,tr. */
3004 if (GET_CODE (pattern) == SET
3005 && SET_DEST (pattern) == pc_rtx
3006 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3007 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3008 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3009 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3010 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3011 && (fcmp_count == fbranch_count
3013 && find_regno_note (next, REG_DEAD, 0))))
3015 /* Reverse the branch. */
3016 tmp = XEXP (SET_SRC (pattern), 1);
3017 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3018 XEXP (SET_SRC (pattern), 2) = tmp;
3019 INSN_CODE (next) = -1;
3021 /* Reverse our condition. */
3022 tmp = PATTERN (insn);
3023 PUT_CODE (XEXP (tmp, 1),
3024 (reverse_condition_maybe_unordered
3025 (GET_CODE (XEXP (tmp, 1)))));
3035 /* You may have trouble believing this, but this is the 32 bit HP-PA
3040 Variable arguments (optional; any number may be allocated)
3042 SP-(4*(N+9)) arg word N
3047 Fixed arguments (must be allocated; may remain unused)
3056 SP-32 External Data Pointer (DP)
3058 SP-24 External/stub RP (RP')
3062 SP-8 Calling Stub RP (RP'')
3067 SP-0 Stack Pointer (points to next available address)
3071 /* This function saves registers as follows. Registers marked with ' are
3072 this function's registers (as opposed to the previous function's).
3073 If a frame_pointer isn't needed, r4 is saved as a general register;
3074 the space for the frame pointer is still allocated, though, to keep
3080 SP (FP') Previous FP
3081 SP + 4 Alignment filler (sigh)
3082 SP + 8 Space for locals reserved here.
3086 SP + n All call saved register used.
3090 SP + o All call saved fp registers used.
3094 SP + p (SP') points to next available address.
3098 /* Global variables set by output_function_prologue(). */
3099 /* Size of frame. Need to know this to emit return insns from
3101 static int actual_fsize;
3102 static int local_fsize, save_fregs;
3104 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3105 Handle case where DISP > 8k by using the add_high_const patterns.
3107 Note in DISP > 8k case, we will leave the high part of the address
3108 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3111 store_reg (reg, disp, base)
3112 int reg, disp, base;
3114 rtx insn, dest, src, basereg;
3116 src = gen_rtx_REG (word_mode, reg);
3117 basereg = gen_rtx_REG (Pmode, base);
3118 if (VAL_14_BITS_P (disp))
3120 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3121 insn = emit_move_insn (dest, src);
3125 rtx delta = GEN_INT (disp);
3126 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3127 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3128 emit_move_insn (tmpreg, high);
3129 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3130 insn = emit_move_insn (dest, src);
3134 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3135 gen_rtx_SET (VOIDmode,
3136 gen_rtx_MEM (word_mode,
3137 gen_rtx_PLUS (word_mode, basereg,
3145 RTX_FRAME_RELATED_P (insn) = 1;
3148 /* Emit RTL to store REG at the memory location specified by BASE and then
3149 add MOD to BASE. MOD must be <= 8k. */
3152 store_reg_modify (base, reg, mod)
3155 rtx insn, basereg, srcreg, delta;
3157 if (! VAL_14_BITS_P (mod))
3160 basereg = gen_rtx_REG (Pmode, base);
3161 srcreg = gen_rtx_REG (word_mode, reg);
3162 delta = GEN_INT (mod);
3164 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3167 RTX_FRAME_RELATED_P (insn) = 1;
3169 /* RTX_FRAME_RELATED_P must be set on each frame related set
3170 in a parallel with more than one element. Don't set
3171 RTX_FRAME_RELATED_P in the first set if reg is temporary
3172 register 1. The effect of this operation is recorded in
3173 the initial copy. */
3176 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3177 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3181 /* The first element of a PARALLEL is always processed if it is
3182 a SET. Thus, we need an expression list for this case. */
3184 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3185 gen_rtx_SET (VOIDmode, basereg,
3186 gen_rtx_PLUS (word_mode, basereg, delta)),
3192 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3193 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3194 whether to add a frame note or not.
3196 In the DISP > 8k case, we leave the high part of the address in %r1.
3197 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3200 set_reg_plus_d (reg, base, disp, note)
3201 int reg, base, disp, note;
3205 if (VAL_14_BITS_P (disp))
3207 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3208 plus_constant (gen_rtx_REG (Pmode, base), disp));
3212 rtx basereg = gen_rtx_REG (Pmode, base);
3213 rtx delta = GEN_INT (disp);
3215 emit_move_insn (gen_rtx_REG (Pmode, 1),
3216 gen_rtx_PLUS (Pmode, basereg,
3217 gen_rtx_HIGH (Pmode, delta)));
3218 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3219 gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 1),
3223 if (DO_FRAME_NOTES && note)
3224 RTX_FRAME_RELATED_P (insn) = 1;
3228 compute_frame_size (size, fregs_live)
3235 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3236 be consistent with the rounding and size calculation done here.
3237 Change them at the same time. */
3239 /* We do our own stack alignment. First, round the size of the
3240 stack locals up to a word boundary. */
3241 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3243 /* Space for previous frame pointer + filler. If any frame is
3244 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3245 waste some space here for the sake of HP compatibility. The
3246 first slot is only used when the frame pointer is needed. */
3247 if (size || frame_pointer_needed)
3248 size += STARTING_FRAME_OFFSET;
3250 /* If the current function calls __builtin_eh_return, then we need
3251 to allocate stack space for registers that will hold data for
3252 the exception handler. */
3253 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3257 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3259 size += i * UNITS_PER_WORD;
3262 /* Account for space used by the callee general register saves. */
3263 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3264 if (regs_ever_live[i])
3265 size += UNITS_PER_WORD;
3267 /* Account for space used by the callee floating point register saves. */
3268 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3269 if (regs_ever_live[i]
3270 || (!TARGET_64BIT && regs_ever_live[i + 1]))
3274 /* We always save both halves of the FP register, so always
3275 increment the frame size by 8 bytes. */
3279 /* If any of the floating registers are saved, account for the
3280 alignment needed for the floating point register save block. */
3283 size = (size + 7) & ~7;
3288 /* The various ABIs include space for the outgoing parameters in the
3289 size of the current function's stack frame. We don't need to align
3290 for the outgoing arguments as their alignment is set by the final
3291 rounding for the frame as a whole. */
3292 size += current_function_outgoing_args_size;
3294 /* Allocate space for the fixed frame marker. This space must be
3295 allocated for any function that makes calls or allocates
3297 if (!current_function_is_leaf || size)
3298 size += TARGET_64BIT ? 48 : 32;
3300 /* Finally, round to the preferred stack boundary. */
3301 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3302 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3305 /* Generate the assembly code for function entry. FILE is a stdio
3306 stream to output the code to. SIZE is an int: how many units of
3307 temporary storage to allocate.
3309 Refer to the array `regs_ever_live' to determine which registers to
3310 save; `regs_ever_live[I]' is nonzero if register number I is ever
3311 used in the function. This function is responsible for knowing
3312 which registers should not be saved even if used. */
3314 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3315 of memory. If any fpu reg is used in the function, we allocate
3316 such a block here, at the bottom of the frame, just in case it's needed.
3318 If this function is a leaf procedure, then we may choose not
3319 to do a "save" insn. The decision about whether or not
3320 to do this is made in regclass.c. */
3323 pa_output_function_prologue (file, size)
3325 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3327 /* The function's label and associated .PROC must never be
3328 separated and must be output *after* any profiling declarations
3329 to avoid changing spaces/subspaces within a procedure. */
3330 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3331 fputs ("\t.PROC\n", file);
3333 /* hppa_expand_prologue does the dirty work now. We just need
3334 to output the assembler directives which denote the start
3336 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
3337 if (regs_ever_live[2])
3338 fputs (",CALLS,SAVE_RP", file);
3340 fputs (",NO_CALLS", file);
3342 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3343 at the beginning of the frame and that it is used as the frame
3344 pointer for the frame. We do this because our current frame
3345 layout doesn't conform to that specified in the the HP runtime
3346 documentation and we need a way to indicate to programs such as
3347 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3348 isn't used by HP compilers but is supported by the assembler.
3349 However, SAVE_SP is supposed to indicate that the previous stack
3350 pointer has been saved in the frame marker. */
3351 if (frame_pointer_needed)
3352 fputs (",SAVE_SP", file);
3354 /* Pass on information about the number of callee register saves
3355 performed in the prologue.
3357 The compiler is supposed to pass the highest register number
3358 saved, the assembler then has to adjust that number before
3359 entering it into the unwind descriptor (to account for any
3360 caller saved registers with lower register numbers than the
3361 first callee saved register). */
3363 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3366 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3368 fputs ("\n\t.ENTRY\n", file);
3370 remove_useless_addtr_insns (0);
3374 hppa_expand_prologue ()
3376 int merge_sp_adjust_with_store = 0;
3377 int size = get_frame_size ();
3385 /* Compute total size for frame pointer, filler, locals and rounding to
3386 the next word boundary. Similar code appears in compute_frame_size
3387 and must be changed in tandem with this code. */
3388 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3389 if (local_fsize || frame_pointer_needed)
3390 local_fsize += STARTING_FRAME_OFFSET;
3392 actual_fsize = compute_frame_size (size, &save_fregs);
3394 /* Compute a few things we will use often. */
3395 tmpreg = gen_rtx_REG (word_mode, 1);
3397 /* Save RP first. The calling conventions manual states RP will
3398 always be stored into the caller's frame at sp - 20 or sp - 16
3399 depending on which ABI is in use. */
3400 if (regs_ever_live[2] || current_function_calls_eh_return)
3401 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3403 /* Allocate the local frame and set up the frame pointer if needed. */
3404 if (actual_fsize != 0)
3406 if (frame_pointer_needed)
3408 /* Copy the old frame pointer temporarily into %r1. Set up the
3409 new stack pointer, then store away the saved old frame pointer
3410 into the stack at sp and at the same time update the stack
3411 pointer by actual_fsize bytes. Two versions, first
3412 handles small (<8k) frames. The second handles large (>=8k)
3414 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3417 /* We need to record the frame pointer save here since the
3418 new frame pointer is set in the following insn. */
3419 RTX_FRAME_RELATED_P (insn) = 1;
3421 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3422 gen_rtx_SET (VOIDmode,
3423 gen_rtx_MEM (word_mode, stack_pointer_rtx),
3428 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3430 RTX_FRAME_RELATED_P (insn) = 1;
3432 if (VAL_14_BITS_P (actual_fsize))
3433 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3436 /* It is incorrect to store the saved frame pointer at *sp,
3437 then increment sp (writes beyond the current stack boundary).
3439 So instead use stwm to store at *sp and post-increment the
3440 stack pointer as an atomic operation. Then increment sp to
3441 finish allocating the new frame. */
3442 int adjust1 = 8192 - 64;
3443 int adjust2 = actual_fsize - adjust1;
3445 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3446 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3450 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3451 we need to store the previous stack pointer (frame pointer)
3452 into the frame marker on targets that use the HP unwind
3453 library. This allows the HP unwind library to be used to
3454 unwind GCC frames. However, we are not fully compatible
3455 with the HP library because our frame layout differs from
3456 that specified in the HP runtime specification.
3458 We don't want a frame note on this instruction as the frame
3459 marker moves during dynamic stack allocation.
3461 This instruction also serves as a blockage to prevent
3462 register spills from being scheduled before the stack
3463 pointer is raised. This is necessary as we store
3464 registers using the frame pointer as a base register,
3465 and the frame pointer is set before sp is raised. */
3466 if (TARGET_HPUX_UNWIND_LIBRARY)
3468 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3469 GEN_INT (TARGET_64BIT ? -8 : -4));
3471 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3475 emit_insn (gen_blockage ());
3477 /* no frame pointer needed. */
3480 /* In some cases we can perform the first callee register save
3481 and allocating the stack frame at the same time. If so, just
3482 make a note of it and defer allocating the frame until saving
3483 the callee registers. */
3484 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3485 merge_sp_adjust_with_store = 1;
3486 /* Can not optimize. Adjust the stack frame by actual_fsize
3489 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3494 /* Normal register save.
3496 Do not save the frame pointer in the frame_pointer_needed case. It
3497 was done earlier. */
3498 if (frame_pointer_needed)
3500 offset = local_fsize;
3502 /* Saving the EH return data registers in the frame is the simplest
3503 way to get the frame unwind information emitted. We put them
3504 just before the general registers. */
3505 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3507 unsigned int i, regno;
3511 regno = EH_RETURN_DATA_REGNO (i);
3512 if (regno == INVALID_REGNUM)
3515 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3516 offset += UNITS_PER_WORD;
3520 for (i = 18; i >= 4; i--)
3521 if (regs_ever_live[i] && ! call_used_regs[i])
3523 store_reg (i, offset, FRAME_POINTER_REGNUM);
3524 offset += UNITS_PER_WORD;
3527 /* Account for %r3 which is saved in a special place. */
3530 /* No frame pointer needed. */
3533 offset = local_fsize - actual_fsize;
3535 /* Saving the EH return data registers in the frame is the simplest
3536 way to get the frame unwind information emitted. */
3537 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3539 unsigned int i, regno;
3543 regno = EH_RETURN_DATA_REGNO (i);
3544 if (regno == INVALID_REGNUM)
3547 /* If merge_sp_adjust_with_store is nonzero, then we can
3548 optimize the first save. */
3549 if (merge_sp_adjust_with_store)
3551 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3552 merge_sp_adjust_with_store = 0;
3555 store_reg (regno, offset, STACK_POINTER_REGNUM);
3556 offset += UNITS_PER_WORD;
3560 for (i = 18; i >= 3; i--)
3561 if (regs_ever_live[i] && ! call_used_regs[i])
3563 /* If merge_sp_adjust_with_store is nonzero, then we can
3564 optimize the first GR save. */
3565 if (merge_sp_adjust_with_store)
3567 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3568 merge_sp_adjust_with_store = 0;
3571 store_reg (i, offset, STACK_POINTER_REGNUM);
3572 offset += UNITS_PER_WORD;
3576 /* If we wanted to merge the SP adjustment with a GR save, but we never
3577 did any GR saves, then just emit the adjustment here. */
3578 if (merge_sp_adjust_with_store)
3579 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3583 /* The hppa calling conventions say that %r19, the pic offset
3584 register, is saved at sp - 32 (in this function's frame)
3585 when generating PIC code. FIXME: What is the correct thing
3586 to do for functions which make no calls and allocate no
3587 frame? Do we need to allocate a frame, or can we just omit
3588 the save? For now we'll just omit the save. */
3589 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3590 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
3592 /* Align pointer properly (doubleword boundary). */
3593 offset = (offset + 7) & ~7;
3595 /* Floating point register store. */
3600 /* First get the frame or stack pointer to the start of the FP register
3602 if (frame_pointer_needed)
3604 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3605 base = frame_pointer_rtx;
3609 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3610 base = stack_pointer_rtx;
3613 /* Now actually save the FP registers. */
3614 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3616 if (regs_ever_live[i]
3617 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3619 rtx addr, insn, reg;
3620 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3621 reg = gen_rtx_REG (DFmode, i);
3622 insn = emit_move_insn (addr, reg);
3625 RTX_FRAME_RELATED_P (insn) = 1;
3628 rtx mem = gen_rtx_MEM (DFmode,
3629 plus_constant (base, offset));
3631 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3632 gen_rtx_SET (VOIDmode, mem, reg),
3637 rtx meml = gen_rtx_MEM (SFmode,
3638 plus_constant (base, offset));
3639 rtx memr = gen_rtx_MEM (SFmode,
3640 plus_constant (base, offset + 4));
3641 rtx regl = gen_rtx_REG (SFmode, i);
3642 rtx regr = gen_rtx_REG (SFmode, i + 1);
3643 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3644 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3647 RTX_FRAME_RELATED_P (setl) = 1;
3648 RTX_FRAME_RELATED_P (setr) = 1;
3649 vec = gen_rtvec (2, setl, setr);
3651 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3652 gen_rtx_SEQUENCE (VOIDmode, vec),
3656 offset += GET_MODE_SIZE (DFmode);
3663 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3664 Handle case where DISP > 8k by using the add_high_const patterns. */
3667 load_reg (reg, disp, base)
3668 int reg, disp, base;
3670 rtx src, dest, basereg;
3672 dest = gen_rtx_REG (word_mode, reg);
3673 basereg = gen_rtx_REG (Pmode, base);
3674 if (VAL_14_BITS_P (disp))
3676 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3677 emit_move_insn (dest, src);
3681 rtx delta = GEN_INT (disp);
3682 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3683 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3684 emit_move_insn (tmpreg, high);
3685 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3686 emit_move_insn (dest, src);
3690 /* Update the total code bytes output to the text section. */
3693 update_total_code_bytes (nbytes)
3696 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
3697 && !IN_NAMED_SECTION_P (cfun->decl))
3699 if (INSN_ADDRESSES_SET_P ())
3701 unsigned long old_total = total_code_bytes;
3703 total_code_bytes += nbytes;
3705 /* Be prepared to handle overflows. */
3706 if (old_total > total_code_bytes)
3707 total_code_bytes = -1;
3710 total_code_bytes = -1;
3714 /* This function generates the assembly code for function exit.
3715 Args are as for output_function_prologue ().
3717 The function epilogue should not depend on the current stack
3718 pointer! It should use the frame pointer only. This is mandatory
3719 because of alloca; we also take advantage of it to omit stack
3720 adjustments before returning. */
3723 pa_output_function_epilogue (file, size)
3725 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3727 rtx insn = get_last_insn ();
3731 /* hppa_expand_epilogue does the dirty work now. We just need
3732 to output the assembler directives which denote the end
3735 To make debuggers happy, emit a nop if the epilogue was completely
3736 eliminated due to a volatile call as the last insn in the
3737 current function. That way the return address (in %r2) will
3738 always point to a valid instruction in the current function. */
3740 /* Get the last real insn. */
3741 if (GET_CODE (insn) == NOTE)
3742 insn = prev_real_insn (insn);
3744 /* If it is a sequence, then look inside. */
3745 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3746 insn = XVECEXP (PATTERN (insn), 0, 0);
3748 /* If insn is a CALL_INSN, then it must be a call to a volatile
3749 function (otherwise there would be epilogue insns). */
3750 if (insn && GET_CODE (insn) == CALL_INSN)
3752 fputs ("\tnop\n", file);
3756 fputs ("\t.EXIT\n\t.PROCEND\n", file);
3758 if (INSN_ADDRESSES_SET_P ())
3760 insn = get_last_nonnote_insn ();
3761 last_address += INSN_ADDRESSES (INSN_UID (insn));
3763 last_address += insn_default_length (insn);
3764 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
3765 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
3768 /* Finally, update the total number of code bytes output so far. */
3769 update_total_code_bytes (last_address);
3773 hppa_expand_epilogue ()
3777 int merge_sp_adjust_with_load = 0;
3780 /* We will use this often. */
3781 tmpreg = gen_rtx_REG (word_mode, 1);
3783 /* Try to restore RP early to avoid load/use interlocks when
3784 RP gets used in the return (bv) instruction. This appears to still
3785 be necessary even when we schedule the prologue and epilogue. */
3786 if (regs_ever_live [2] || current_function_calls_eh_return)
3788 ret_off = TARGET_64BIT ? -16 : -20;
3789 if (frame_pointer_needed)
3791 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
3796 /* No frame pointer, and stack is smaller than 8k. */
3797 if (VAL_14_BITS_P (ret_off - actual_fsize))
3799 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
3805 /* General register restores. */
3806 if (frame_pointer_needed)
3808 offset = local_fsize;
3810 /* If the current function calls __builtin_eh_return, then we need
3811 to restore the saved EH data registers. */
3812 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3814 unsigned int i, regno;
3818 regno = EH_RETURN_DATA_REGNO (i);
3819 if (regno == INVALID_REGNUM)
3822 load_reg (regno, offset, FRAME_POINTER_REGNUM);
3823 offset += UNITS_PER_WORD;
3827 for (i = 18; i >= 4; i--)
3828 if (regs_ever_live[i] && ! call_used_regs[i])
3830 load_reg (i, offset, FRAME_POINTER_REGNUM);
3831 offset += UNITS_PER_WORD;
3836 offset = local_fsize - actual_fsize;
3838 /* If the current function calls __builtin_eh_return, then we need
3839 to restore the saved EH data registers. */
3840 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3842 unsigned int i, regno;
3846 regno = EH_RETURN_DATA_REGNO (i);
3847 if (regno == INVALID_REGNUM)
3850 /* Only for the first load.
3851 merge_sp_adjust_with_load holds the register load
3852 with which we will merge the sp adjustment. */
3853 if (merge_sp_adjust_with_load == 0
3855 && VAL_14_BITS_P (-actual_fsize))
3856 merge_sp_adjust_with_load = regno;
3858 load_reg (regno, offset, STACK_POINTER_REGNUM);
3859 offset += UNITS_PER_WORD;
3863 for (i = 18; i >= 3; i--)
3865 if (regs_ever_live[i] && ! call_used_regs[i])
3867 /* Only for the first load.
3868 merge_sp_adjust_with_load holds the register load
3869 with which we will merge the sp adjustment. */
3870 if (merge_sp_adjust_with_load == 0
3872 && VAL_14_BITS_P (-actual_fsize))
3873 merge_sp_adjust_with_load = i;
3875 load_reg (i, offset, STACK_POINTER_REGNUM);
3876 offset += UNITS_PER_WORD;
3881 /* Align pointer properly (doubleword boundary). */
3882 offset = (offset + 7) & ~7;
3884 /* FP register restores. */
3887 /* Adjust the register to index off of. */
3888 if (frame_pointer_needed)
3889 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3891 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3893 /* Actually do the restores now. */
3894 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3895 if (regs_ever_live[i]
3896 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3898 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3899 rtx dest = gen_rtx_REG (DFmode, i);
3900 emit_move_insn (dest, src);
3904 /* Emit a blockage insn here to keep these insns from being moved to
3905 an earlier spot in the epilogue, or into the main instruction stream.
3907 This is necessary as we must not cut the stack back before all the
3908 restores are finished. */
3909 emit_insn (gen_blockage ());
3911 /* Reset stack pointer (and possibly frame pointer). The stack
3912 pointer is initially set to fp + 64 to avoid a race condition. */
3913 if (frame_pointer_needed)
3915 rtx delta = GEN_INT (-64);
3917 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
3918 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
3920 /* If we were deferring a callee register restore, do it now. */
3921 else if (merge_sp_adjust_with_load)
3923 rtx delta = GEN_INT (-actual_fsize);
3924 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
3926 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
3928 else if (actual_fsize != 0)
3929 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3932 /* If we haven't restored %r2 yet (no frame pointer, and a stack
3933 frame greater than 8k), do so now. */
3935 load_reg (2, ret_off, STACK_POINTER_REGNUM);
3937 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3939 rtx sa = EH_RETURN_STACKADJ_RTX;
3941 emit_insn (gen_blockage ());
3942 emit_insn (TARGET_64BIT
3943 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
3944 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
3949 hppa_pic_save_rtx ()
3951 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
3955 hppa_profile_hook (label_no)
3958 rtx begin_label_rtx, call_insn;
3959 char begin_label_name[16];
3961 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
3963 begin_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (begin_label_name));
3966 emit_move_insn (arg_pointer_rtx,
3967 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
3970 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
3972 #ifndef NO_PROFILE_COUNTERS
3974 rtx count_label_rtx, addr, r24;
3975 char count_label_name[16];
3977 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
3978 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
3980 addr = force_reg (Pmode, count_label_rtx);
3981 r24 = gen_rtx_REG (Pmode, 24);
3982 emit_move_insn (r24, addr);
3984 /* %r25 is set from within the output pattern. */
3986 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3987 GEN_INT (TARGET_64BIT ? 24 : 12),
3990 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
3993 /* %r25 is set from within the output pattern. */
3995 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3996 GEN_INT (TARGET_64BIT ? 16 : 8),
4000 /* Indicate the _mcount call cannot throw, nor will it execute a
4002 REG_NOTES (call_insn)
4003 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
4007 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
4009 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
4011 emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
4015 /* Fetch the return address for the frame COUNT steps up from
4016 the current frame, after the prologue. FRAMEADDR is the
4017 frame pointer of the COUNT frame.
4019 We want to ignore any export stub remnants here. To handle this,
4020 we examine the code at the return address, and if it is an export
4021 stub, we return a memory rtx for the stub return address stored
4024 The value returned is used in two different ways:
4026 1. To find a function's caller.
4028 2. To change the return address for a function.
4030 This function handles most instances of case 1; however, it will
4031 fail if there are two levels of stubs to execute on the return
4032 path. The only way I believe that can happen is if the return value
4033 needs a parameter relocation, which never happens for C code.
4035 This function handles most instances of case 2; however, it will
4036 fail if we did not originally have stub code on the return path
4037 but will need stub code on the new return path. This can happen if
4038 the caller & callee are both in the main program, but the new
4039 return location is in a shared library. */
4042 return_addr_rtx (count, frameaddr)
4054 rp = get_hard_reg_initial_val (Pmode, 2);
4056 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4059 saved_rp = gen_reg_rtx (Pmode);
4060 emit_move_insn (saved_rp, rp);
4062 /* Get pointer to the instruction stream. We have to mask out the
4063 privilege level from the two low order bits of the return address
4064 pointer here so that ins will point to the start of the first
4065 instruction that would have been executed if we returned. */
4066 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4067 label = gen_label_rtx ();
4069 /* Check the instruction stream at the normal return address for the
4072 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4073 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4074 0x00011820 | stub+16: mtsp r1,sr0
4075 0xe0400002 | stub+20: be,n 0(sr0,rp)
4077 If it is an export stub, than our return address is really in
4080 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4081 NULL_RTX, SImode, 1);
4082 emit_jump_insn (gen_bne (label));
4084 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4085 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4086 emit_jump_insn (gen_bne (label));
4088 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4089 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4090 emit_jump_insn (gen_bne (label));
4092 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4093 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
4095 /* If there is no export stub then just use the value saved from
4096 the return pointer register. */
4098 emit_jump_insn (gen_bne (label));
4100 /* Here we know that our return address points to an export
4101 stub. We don't want to return the address of the export stub,
4102 but rather the return address of the export stub. That return
4103 address is stored at -24[frameaddr]. */
4105 emit_move_insn (saved_rp,
4107 memory_address (Pmode,
4108 plus_constant (frameaddr,
4115 /* This is only valid once reload has completed because it depends on
4116 knowing exactly how much (if any) frame there is and...
4118 It's only valid if there is no frame marker to de-allocate and...
4120 It's only valid if %r2 hasn't been saved into the caller's frame
4121 (we're not profiling and %r2 isn't live anywhere). */
4123 hppa_can_use_return_insn_p ()
4125 return (reload_completed
4126 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
4127 && ! regs_ever_live[2]
4128 && ! frame_pointer_needed);
4132 emit_bcond_fp (code, operand0)
4136 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4137 gen_rtx_IF_THEN_ELSE (VOIDmode,
4138 gen_rtx_fmt_ee (code,
4140 gen_rtx_REG (CCFPmode, 0),
4142 gen_rtx_LABEL_REF (VOIDmode, operand0),
4148 gen_cmp_fp (code, operand0, operand1)
4150 rtx operand0, operand1;
4152 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4153 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4156 /* Adjust the cost of a scheduling dependency. Return the new cost of
4157 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4160 pa_adjust_cost (insn, link, dep_insn, cost)
4166 enum attr_type attr_type;
4168 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4169 true dependencies as they are described with bypasses now. */
4170 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4173 if (! recog_memoized (insn))
4176 attr_type = get_attr_type (insn);
4178 if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
4180 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4183 if (attr_type == TYPE_FPLOAD)
4185 rtx pat = PATTERN (insn);
4186 rtx dep_pat = PATTERN (dep_insn);
4187 if (GET_CODE (pat) == PARALLEL)
4189 /* This happens for the fldXs,mb patterns. */
4190 pat = XVECEXP (pat, 0, 0);
4192 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4193 /* If this happens, we have to extend this to schedule
4194 optimally. Return 0 for now. */
4197 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4199 if (! recog_memoized (dep_insn))
4201 switch (get_attr_type (dep_insn))
4208 case TYPE_FPSQRTSGL:
4209 case TYPE_FPSQRTDBL:
4210 /* A fpload can't be issued until one cycle before a
4211 preceding arithmetic operation has finished if
4212 the target of the fpload is any of the sources
4213 (or destination) of the arithmetic operation. */
4214 return insn_default_latency (dep_insn) - 1;
4221 else if (attr_type == TYPE_FPALU)
4223 rtx pat = PATTERN (insn);
4224 rtx dep_pat = PATTERN (dep_insn);
4225 if (GET_CODE (pat) == PARALLEL)
4227 /* This happens for the fldXs,mb patterns. */
4228 pat = XVECEXP (pat, 0, 0);
4230 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4231 /* If this happens, we have to extend this to schedule
4232 optimally. Return 0 for now. */
4235 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4237 if (! recog_memoized (dep_insn))
4239 switch (get_attr_type (dep_insn))
4243 case TYPE_FPSQRTSGL:
4244 case TYPE_FPSQRTDBL:
4245 /* An ALU flop can't be issued until two cycles before a
4246 preceding divide or sqrt operation has finished if
4247 the target of the ALU flop is any of the sources
4248 (or destination) of the divide or sqrt operation. */
4249 return insn_default_latency (dep_insn) - 2;
4257 /* For other anti dependencies, the cost is 0. */
4260 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4262 /* Output dependency; DEP_INSN writes a register that INSN writes some
4264 if (attr_type == TYPE_FPLOAD)
4266 rtx pat = PATTERN (insn);
4267 rtx dep_pat = PATTERN (dep_insn);
4268 if (GET_CODE (pat) == PARALLEL)
4270 /* This happens for the fldXs,mb patterns. */
4271 pat = XVECEXP (pat, 0, 0);
4273 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4274 /* If this happens, we have to extend this to schedule
4275 optimally. Return 0 for now. */
4278 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4280 if (! recog_memoized (dep_insn))
4282 switch (get_attr_type (dep_insn))
4289 case TYPE_FPSQRTSGL:
4290 case TYPE_FPSQRTDBL:
4291 /* A fpload can't be issued until one cycle before a
4292 preceding arithmetic operation has finished if
4293 the target of the fpload is the destination of the
4294 arithmetic operation.
4296 Exception: For PA7100LC, PA7200 and PA7300, the cost
4297 is 3 cycles, unless they bundle together. We also
4298 pay the penalty if the second insn is a fpload. */
4299 return insn_default_latency (dep_insn) - 1;
4306 else if (attr_type == TYPE_FPALU)
4308 rtx pat = PATTERN (insn);
4309 rtx dep_pat = PATTERN (dep_insn);
4310 if (GET_CODE (pat) == PARALLEL)
4312 /* This happens for the fldXs,mb patterns. */
4313 pat = XVECEXP (pat, 0, 0);
4315 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4316 /* If this happens, we have to extend this to schedule
4317 optimally. Return 0 for now. */
4320 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4322 if (! recog_memoized (dep_insn))
4324 switch (get_attr_type (dep_insn))
4328 case TYPE_FPSQRTSGL:
4329 case TYPE_FPSQRTDBL:
4330 /* An ALU flop can't be issued until two cycles before a
4331 preceding divide or sqrt operation has finished if
4332 the target of the ALU flop is also the target of
4333 the divide or sqrt operation. */
4334 return insn_default_latency (dep_insn) - 2;
4342 /* For other output dependencies, the cost is 0. */
4349 /* Adjust scheduling priorities. We use this to try and keep addil
4350 and the next use of %r1 close together. */
4352 pa_adjust_priority (insn, priority)
4356 rtx set = single_set (insn);
4360 src = SET_SRC (set);
4361 dest = SET_DEST (set);
4362 if (GET_CODE (src) == LO_SUM
4363 && symbolic_operand (XEXP (src, 1), VOIDmode)
4364 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4367 else if (GET_CODE (src) == MEM
4368 && GET_CODE (XEXP (src, 0)) == LO_SUM
4369 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4370 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4373 else if (GET_CODE (dest) == MEM
4374 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4375 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4376 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4382 /* The 700 can only issue a single insn at a time.
4383 The 7XXX processors can issue two insns at a time.
4384 The 8000 can issue 4 insns at a time. */
4390 case PROCESSOR_700: return 1;
4391 case PROCESSOR_7100: return 2;
4392 case PROCESSOR_7100LC: return 2;
4393 case PROCESSOR_7200: return 2;
4394 case PROCESSOR_7300: return 2;
4395 case PROCESSOR_8000: return 4;
4404 /* Return any length adjustment needed by INSN which already has its length
4405 computed as LENGTH. Return zero if no adjustment is necessary.
4407 For the PA: function calls, millicode calls, and backwards short
4408 conditional branches with unfilled delay slots need an adjustment by +1
4409 (to account for the NOP which will be inserted into the instruction stream).
4411 Also compute the length of an inline block move here as it is too
4412 complicated to express as a length attribute in pa.md. */
4414 pa_adjust_insn_length (insn, length)
4418 rtx pat = PATTERN (insn);
4420 /* Call insns which are *not* indirect and have unfilled delay slots. */
4421 if (GET_CODE (insn) == CALL_INSN)
4424 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
4425 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
4427 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
4428 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
4434 /* Jumps inside switch tables which have unfilled delay slots
4435 also need adjustment. */
4436 else if (GET_CODE (insn) == JUMP_INSN
4437 && simplejump_p (insn)
4438 && GET_MODE (insn) == SImode)
4440 /* Millicode insn with an unfilled delay slot. */
4441 else if (GET_CODE (insn) == INSN
4442 && GET_CODE (pat) != SEQUENCE
4443 && GET_CODE (pat) != USE
4444 && GET_CODE (pat) != CLOBBER
4445 && get_attr_type (insn) == TYPE_MILLI)
4447 /* Block move pattern. */
4448 else if (GET_CODE (insn) == INSN
4449 && GET_CODE (pat) == PARALLEL
4450 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4451 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4452 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4453 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4454 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4455 return compute_movstrsi_length (insn) - 4;
4456 /* Conditional branch with an unfilled delay slot. */
4457 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4459 /* Adjust a short backwards conditional with an unfilled delay slot. */
4460 if (GET_CODE (pat) == SET
4462 && ! forward_branch_p (insn))
4464 else if (GET_CODE (pat) == PARALLEL
4465 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4468 /* Adjust dbra insn with short backwards conditional branch with
4469 unfilled delay slot -- only for case where counter is in a
4470 general register register. */
4471 else if (GET_CODE (pat) == PARALLEL
4472 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4473 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4474 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4476 && ! forward_branch_p (insn))
4484 /* Print operand X (an rtx) in assembler syntax to file FILE.
4485 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4486 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4489 print_operand (file, x, code)
4497 /* Output a 'nop' if there's nothing for the delay slot. */
4498 if (dbr_sequence_length () == 0)
4499 fputs ("\n\tnop", file);
4502 /* Output a nullification completer if there's nothing for the */
4503 /* delay slot or nullification is requested. */
4504 if (dbr_sequence_length () == 0 ||
4506 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4510 /* Print out the second register name of a register pair.
4511 I.e., R (6) => 7. */
4512 fputs (reg_names[REGNO (x) + 1], file);
4515 /* A register or zero. */
4517 || (x == CONST0_RTX (DFmode))
4518 || (x == CONST0_RTX (SFmode)))
4520 fputs ("%r0", file);
4526 /* A register or zero (floating point). */
4528 || (x == CONST0_RTX (DFmode))
4529 || (x == CONST0_RTX (SFmode)))
4531 fputs ("%fr0", file);
4540 xoperands[0] = XEXP (XEXP (x, 0), 0);
4541 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4542 output_global_address (file, xoperands[1], 0);
4543 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4547 case 'C': /* Plain (C)ondition */
4549 switch (GET_CODE (x))
4552 fputs ("=", file); break;
4554 fputs ("<>", file); break;
4556 fputs (">", file); break;
4558 fputs (">=", file); break;
4560 fputs (">>=", file); break;
4562 fputs (">>", file); break;
4564 fputs ("<", file); break;
4566 fputs ("<=", file); break;
4568 fputs ("<<=", file); break;
4570 fputs ("<<", file); break;
4575 case 'N': /* Condition, (N)egated */
4576 switch (GET_CODE (x))
4579 fputs ("<>", file); break;
4581 fputs ("=", file); break;
4583 fputs ("<=", file); break;
4585 fputs ("<", file); break;
4587 fputs ("<<", file); break;
4589 fputs ("<<=", file); break;
4591 fputs (">=", file); break;
4593 fputs (">", file); break;
4595 fputs (">>", file); break;
4597 fputs (">>=", file); break;
4602 /* For floating point comparisons. Note that the output
4603 predicates are the complement of the desired mode. */
4605 switch (GET_CODE (x))
4608 fputs ("!=", file); break;
4610 fputs ("=", file); break;
4612 fputs ("!>", file); break;
4614 fputs ("!>=", file); break;
4616 fputs ("!<", file); break;
4618 fputs ("!<=", file); break;
4620 fputs ("!<>", file); break;
4622 fputs (">", file); break;
4624 fputs (">=", file); break;
4626 fputs ("<", file); break;
4628 fputs ("<=", file); break;
4630 fputs ("<>", file); break;
4632 fputs ("<=>", file); break;
4634 fputs ("!<=>", file); break;
4639 case 'S': /* Condition, operands are (S)wapped. */
4640 switch (GET_CODE (x))
4643 fputs ("=", file); break;
4645 fputs ("<>", file); break;
4647 fputs ("<", file); break;
4649 fputs ("<=", file); break;
4651 fputs ("<<=", file); break;
4653 fputs ("<<", file); break;
4655 fputs (">", file); break;
4657 fputs (">=", file); break;
4659 fputs (">>=", file); break;
4661 fputs (">>", file); break;
4666 case 'B': /* Condition, (B)oth swapped and negate. */
4667 switch (GET_CODE (x))
4670 fputs ("<>", file); break;
4672 fputs ("=", file); break;
4674 fputs (">=", file); break;
4676 fputs (">", file); break;
4678 fputs (">>", file); break;
4680 fputs (">>=", file); break;
4682 fputs ("<=", file); break;
4684 fputs ("<", file); break;
4686 fputs ("<<", file); break;
4688 fputs ("<<=", file); break;
4694 if (GET_CODE (x) == CONST_INT)
4696 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4701 if (GET_CODE (x) == CONST_INT)
4703 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4708 if (GET_CODE (x) == CONST_INT)
4710 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4715 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
4717 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4722 if (GET_CODE (x) == CONST_INT)
4724 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4729 if (GET_CODE (x) == CONST_INT)
4731 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4736 if (GET_CODE (x) == CONST_INT)
4741 switch (GET_CODE (XEXP (x, 0)))
4745 if (ASSEMBLER_DIALECT == 0)
4746 fputs ("s,mb", file);
4748 fputs (",mb", file);
4752 if (ASSEMBLER_DIALECT == 0)
4753 fputs ("s,ma", file);
4755 fputs (",ma", file);
4758 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4759 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4761 if (ASSEMBLER_DIALECT == 0)
4762 fputs ("x,s", file);
4766 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4770 if (code == 'F' && ASSEMBLER_DIALECT == 0)
4776 output_global_address (file, x, 0);
4779 output_global_address (file, x, 1);
4781 case 0: /* Don't do anything special */
4786 compute_zdepwi_operands (INTVAL (x), op);
4787 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4793 compute_zdepdi_operands (INTVAL (x), op);
4794 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4798 /* We can get here from a .vtable_inherit due to our
4799 CONSTANT_ADDRESS_P rejecting perfectly good constant
4805 if (GET_CODE (x) == REG)
4807 fputs (reg_names [REGNO (x)], file);
4808 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
4814 && GET_MODE_SIZE (GET_MODE (x)) <= 4
4815 && (REGNO (x) & 1) == 0)
4818 else if (GET_CODE (x) == MEM)
4820 int size = GET_MODE_SIZE (GET_MODE (x));
4821 rtx base = NULL_RTX;
4822 switch (GET_CODE (XEXP (x, 0)))
4826 base = XEXP (XEXP (x, 0), 0);
4827 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
4831 base = XEXP (XEXP (x, 0), 0);
4832 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
4835 if (GET_CODE (XEXP (x, 0)) == PLUS
4836 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
4837 fprintf (file, "%s(%s)",
4838 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
4839 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
4840 else if (GET_CODE (XEXP (x, 0)) == PLUS
4841 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4842 fprintf (file, "%s(%s)",
4843 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
4844 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
4846 output_address (XEXP (x, 0));
4851 output_addr_const (file, x);
4854 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
4857 output_global_address (file, x, round_constant)
4863 /* Imagine (high (const (plus ...))). */
4864 if (GET_CODE (x) == HIGH)
4867 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
4868 assemble_name (file, XSTR (x, 0));
4869 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
4871 assemble_name (file, XSTR (x, 0));
4872 fputs ("-$global$", file);
4874 else if (GET_CODE (x) == CONST)
4876 const char *sep = "";
4877 int offset = 0; /* assembler wants -$global$ at end */
4878 rtx base = NULL_RTX;
4880 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4882 base = XEXP (XEXP (x, 0), 0);
4883 output_addr_const (file, base);
4885 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
4886 offset = INTVAL (XEXP (XEXP (x, 0), 0));
4889 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
4891 base = XEXP (XEXP (x, 0), 1);
4892 output_addr_const (file, base);
4894 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
4895 offset = INTVAL (XEXP (XEXP (x, 0), 1));
4898 /* How bogus. The compiler is apparently responsible for
4899 rounding the constant if it uses an LR field selector.
4901 The linker and/or assembler seem a better place since
4902 they have to do this kind of thing already.
4904 If we fail to do this, HP's optimizing linker may eliminate
4905 an addil, but not update the ldw/stw/ldo instruction that
4906 uses the result of the addil. */
4908 offset = ((offset + 0x1000) & ~0x1fff);
4910 if (GET_CODE (XEXP (x, 0)) == PLUS)
4920 else if (GET_CODE (XEXP (x, 0)) == MINUS
4921 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4925 if (!read_only_operand (base, VOIDmode) && !flag_pic)
4926 fputs ("-$global$", file);
4928 fprintf (file, "%s%d", sep, offset);
4931 output_addr_const (file, x);
4934 /* Output boilerplate text to appear at the beginning of the file.
4935 There are several possible versions. */
4936 #define aputs(x) fputs(x, asm_out_file)
4938 pa_file_start_level ()
4941 aputs ("\t.LEVEL 2.0w\n");
4942 else if (TARGET_PA_20)
4943 aputs ("\t.LEVEL 2.0\n");
4944 else if (TARGET_PA_11)
4945 aputs ("\t.LEVEL 1.1\n");
4947 aputs ("\t.LEVEL 1.0\n");
4951 pa_file_start_space (sortspace)
4954 aputs ("\t.SPACE $PRIVATE$");
4957 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
4958 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
4959 "\n\t.SPACE $TEXT$");
4962 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
4963 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
4967 pa_file_start_file (want_version)
4970 if (write_symbols != NO_DEBUG)
4972 output_file_directive (asm_out_file, main_input_filename);
4974 aputs ("\t.version\t\"01.01\"\n");
4979 pa_file_start_mcount (aswhat)
4983 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
4987 pa_elf_file_start ()
4989 pa_file_start_level ();
4990 pa_file_start_mcount ("ENTRY");
4991 pa_file_start_file (0);
4995 pa_som_file_start ()
4997 pa_file_start_level ();
4998 pa_file_start_space (0);
4999 aputs ("\t.IMPORT $global$,DATA\n"
5000 "\t.IMPORT $$dyncall,MILLICODE\n");
5001 pa_file_start_mcount ("CODE");
5002 pa_file_start_file (0);
5006 pa_linux_file_start ()
5008 pa_file_start_file (1);
5009 pa_file_start_level ();
5010 pa_file_start_mcount ("CODE");
5014 pa_hpux64_gas_file_start ()
5016 pa_file_start_level ();
5017 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5019 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5021 pa_file_start_file (1);
5025 pa_hpux64_hpas_file_start ()
5027 pa_file_start_level ();
5028 pa_file_start_space (1);
5029 pa_file_start_mcount ("CODE");
5030 pa_file_start_file (0);
5034 static struct deferred_plabel *
5040 /* See if we have already put this function on the list of deferred
5041 plabels. This list is generally small, so a liner search is not
5042 too ugly. If it proves too slow replace it with something faster. */
5043 for (i = 0; i < n_deferred_plabels; i++)
5044 if (strcmp (fname, deferred_plabels[i].name) == 0)
5047 /* If the deferred plabel list is empty, or this entry was not found
5048 on the list, create a new entry on the list. */
5049 if (deferred_plabels == NULL || i == n_deferred_plabels)
5051 const char *real_name;
5053 if (deferred_plabels == 0)
5054 deferred_plabels = (struct deferred_plabel *)
5055 ggc_alloc (sizeof (struct deferred_plabel));
5057 deferred_plabels = (struct deferred_plabel *)
5058 ggc_realloc (deferred_plabels,
5059 ((n_deferred_plabels + 1)
5060 * sizeof (struct deferred_plabel)));
5062 i = n_deferred_plabels++;
5063 deferred_plabels[i].internal_label = gen_label_rtx ();
5064 deferred_plabels[i].name = ggc_strdup (fname);
5066 /* Gross. We have just implicitly taken the address of this function,
5068 real_name = (*targetm.strip_name_encoding) (fname);
5069 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
5072 return &deferred_plabels[i];
5076 output_deferred_plabels ()
5079 /* If we have deferred plabels, then we need to switch into the data
5080 section and align it to a 4 byte boundary before we output the
5081 deferred plabels. */
5082 if (n_deferred_plabels)
5085 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5088 /* Now output the deferred plabels. */
5089 for (i = 0; i < n_deferred_plabels; i++)
5091 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5092 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5093 assemble_integer (gen_rtx_SYMBOL_REF (Pmode, deferred_plabels[i].name),
5094 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5098 /* HP's millicode routines mean something special to the assembler.
5099 Keep track of which ones we have used. */
5101 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5102 static void import_milli PARAMS ((enum millicodes));
5103 static char imported[(int) end1000];
5104 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5105 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5106 #define MILLI_START 10
5110 enum millicodes code;
5112 char str[sizeof (import_string)];
5114 if (!imported[(int) code])
5116 imported[(int) code] = 1;
5117 strcpy (str, import_string);
5118 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5119 output_asm_insn (str, 0);
5123 /* The register constraints have put the operands and return value in
5124 the proper registers. */
5127 output_mul_insn (unsignedp, insn)
5128 int unsignedp ATTRIBUTE_UNUSED;
5131 import_milli (mulI);
5132 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5135 /* Emit the rtl for doing a division by a constant. */
5137 /* Do magic division millicodes exist for this value? */
5138 static const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
5141 /* We'll use an array to keep track of the magic millicodes and
5142 whether or not we've used them already. [n][0] is signed, [n][1] is
5145 static int div_milli[16][2];
5148 div_operand (op, mode)
5150 enum machine_mode mode;
5152 return (mode == SImode
5153 && ((GET_CODE (op) == REG && REGNO (op) == 25)
5154 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
5155 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
5159 emit_hpdiv_const (operands, unsignedp)
5163 if (GET_CODE (operands[2]) == CONST_INT
5164 && INTVAL (operands[2]) > 0
5165 && INTVAL (operands[2]) < 16
5166 && magic_milli[INTVAL (operands[2])])
5168 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5170 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5173 (PARALLEL, VOIDmode,
5174 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5175 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5177 gen_rtx_REG (SImode, 26),
5179 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5180 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5181 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5182 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5183 gen_rtx_CLOBBER (VOIDmode, ret))));
5184 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5191 output_div_insn (operands, unsignedp, insn)
5198 /* If the divisor is a constant, try to use one of the special
5200 if (GET_CODE (operands[0]) == CONST_INT)
5202 static char buf[100];
5203 divisor = INTVAL (operands[0]);
5204 if (!div_milli[divisor][unsignedp])
5206 div_milli[divisor][unsignedp] = 1;
5208 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5210 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5214 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5215 INTVAL (operands[0]));
5216 return output_millicode_call (insn,
5217 gen_rtx_SYMBOL_REF (SImode, buf));
5221 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5222 INTVAL (operands[0]));
5223 return output_millicode_call (insn,
5224 gen_rtx_SYMBOL_REF (SImode, buf));
5227 /* Divisor isn't a special constant. */
5232 import_milli (divU);
5233 return output_millicode_call (insn,
5234 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5238 import_milli (divI);
5239 return output_millicode_call (insn,
5240 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5245 /* Output a $$rem millicode to do mod. */
5248 output_mod_insn (unsignedp, insn)
5254 import_milli (remU);
5255 return output_millicode_call (insn,
5256 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5260 import_milli (remI);
5261 return output_millicode_call (insn,
5262 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5267 output_arg_descriptor (call_insn)
5270 const char *arg_regs[4];
5271 enum machine_mode arg_mode;
5273 int i, output_flag = 0;
5276 /* We neither need nor want argument location descriptors for the
5277 64bit runtime environment or the ELF32 environment. */
5278 if (TARGET_64BIT || TARGET_ELF32)
5281 for (i = 0; i < 4; i++)
5284 /* Specify explicitly that no argument relocations should take place
5285 if using the portable runtime calling conventions. */
5286 if (TARGET_PORTABLE_RUNTIME)
5288 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5293 if (GET_CODE (call_insn) != CALL_INSN)
5295 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
5297 rtx use = XEXP (link, 0);
5299 if (! (GET_CODE (use) == USE
5300 && GET_CODE (XEXP (use, 0)) == REG
5301 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5304 arg_mode = GET_MODE (XEXP (use, 0));
5305 regno = REGNO (XEXP (use, 0));
5306 if (regno >= 23 && regno <= 26)
5308 arg_regs[26 - regno] = "GR";
5309 if (arg_mode == DImode)
5310 arg_regs[25 - regno] = "GR";
5312 else if (regno >= 32 && regno <= 39)
5314 if (arg_mode == SFmode)
5315 arg_regs[(regno - 32) / 2] = "FR";
5318 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5319 arg_regs[(regno - 34) / 2] = "FR";
5320 arg_regs[(regno - 34) / 2 + 1] = "FU";
5322 arg_regs[(regno - 34) / 2] = "FU";
5323 arg_regs[(regno - 34) / 2 + 1] = "FR";
5328 fputs ("\t.CALL ", asm_out_file);
5329 for (i = 0; i < 4; i++)
5334 fputc (',', asm_out_file);
5335 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5338 fputc ('\n', asm_out_file);
5341 /* Return the class of any secondary reload register that is needed to
5342 move IN into a register in class CLASS using mode MODE.
5344 Profiling has showed this routine and its descendants account for
5345 a significant amount of compile time (~7%). So it has been
5346 optimized to reduce redundant computations and eliminate useless
5349 It might be worthwhile to try and make this a leaf function too. */
5352 secondary_reload_class (class, mode, in)
5353 enum reg_class class;
5354 enum machine_mode mode;
5357 int regno, is_symbolic;
5359 /* Trying to load a constant into a FP register during PIC code
5360 generation will require %r1 as a scratch register. */
5362 && GET_MODE_CLASS (mode) == MODE_INT
5363 && FP_REG_CLASS_P (class)
5364 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
5367 /* Profiling showed the PA port spends about 1.3% of its compilation
5368 time in true_regnum from calls inside secondary_reload_class. */
5370 if (GET_CODE (in) == REG)
5373 if (regno >= FIRST_PSEUDO_REGISTER)
5374 regno = true_regnum (in);
5376 else if (GET_CODE (in) == SUBREG)
5377 regno = true_regnum (in);
5381 /* If we have something like (mem (mem (...)), we can safely assume the
5382 inner MEM will end up in a general register after reloading, so there's
5383 no need for a secondary reload. */
5384 if (GET_CODE (in) == MEM
5385 && GET_CODE (XEXP (in, 0)) == MEM)
5388 /* Handle out of range displacement for integer mode loads/stores of
5390 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5391 && GET_MODE_CLASS (mode) == MODE_INT
5392 && FP_REG_CLASS_P (class))
5393 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5394 return GENERAL_REGS;
5396 /* A SAR<->FP register copy requires a secondary register (GPR) as
5397 well as secondary memory. */
5398 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5399 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5400 || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5401 return GENERAL_REGS;
5403 if (GET_CODE (in) == HIGH)
5406 /* Profiling has showed GCC spends about 2.6% of its compilation
5407 time in symbolic_operand from calls inside secondary_reload_class.
5409 We use an inline copy and only compute its return value once to avoid
5411 switch (GET_CODE (in))
5421 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
5422 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
5423 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
5433 && read_only_operand (in, VOIDmode))
5436 if (class != R1_REGS && is_symbolic)
5443 function_arg_padding (mode, type)
5444 enum machine_mode mode;
5448 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5450 /* Return none if justification is not required. */
5452 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5453 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5456 /* The directions set here are ignored when a BLKmode argument larger
5457 than a word is placed in a register. Different code is used for
5458 the stack and registers. This makes it difficult to have a
5459 consistent data representation for both the stack and registers.
5460 For both runtimes, the justification and padding for arguments on
5461 the stack and in registers should be identical. */
5463 /* The 64-bit runtime specifies left justification for aggregates. */
5466 /* The 32-bit runtime architecture specifies right justification.
5467 When the argument is passed on the stack, the argument is padded
5468 with garbage on the left. The HP compiler pads with zeros. */
5472 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5479 /* Do what is necessary for `va_start'. We look at the current function
5480 to determine if stdargs or varargs is used and fill in an initial
5481 va_list. A pointer to this constructor is returned. */
5484 hppa_builtin_saveregs ()
5487 tree fntype = TREE_TYPE (current_function_decl);
5488 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5489 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5490 != void_type_node)))
5491 ? UNITS_PER_WORD : 0);
5494 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5496 offset = current_function_arg_offset_rtx;
5502 /* Adjust for varargs/stdarg differences. */
5504 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5506 offset = current_function_arg_offset_rtx;
5508 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5509 from the incoming arg pointer and growing to larger addresses. */
5510 for (i = 26, off = -64; i >= 19; i--, off += 8)
5511 emit_move_insn (gen_rtx_MEM (word_mode,
5512 plus_constant (arg_pointer_rtx, off)),
5513 gen_rtx_REG (word_mode, i));
5515 /* The incoming args pointer points just beyond the flushback area;
5516 normally this is not a serious concern. However, when we are doing
5517 varargs/stdargs we want to make the arg pointer point to the start
5518 of the incoming argument area. */
5519 emit_move_insn (virtual_incoming_args_rtx,
5520 plus_constant (arg_pointer_rtx, -64));
5522 /* Now return a pointer to the first anonymous argument. */
5523 return copy_to_reg (expand_binop (Pmode, add_optab,
5524 virtual_incoming_args_rtx,
5525 offset, 0, 0, OPTAB_LIB_WIDEN));
5528 /* Store general registers on the stack. */
5529 dest = gen_rtx_MEM (BLKmode,
5530 plus_constant (current_function_internal_arg_pointer,
5532 set_mem_alias_set (dest, get_varargs_alias_set ());
5533 set_mem_align (dest, BITS_PER_WORD);
5534 move_block_from_reg (23, dest, 4);
5536 /* move_block_from_reg will emit code to store the argument registers
5537 individually as scalar stores.
5539 However, other insns may later load from the same addresses for
5540 a structure load (passing a struct to a varargs routine).
5542 The alias code assumes that such aliasing can never happen, so we
5543 have to keep memory referencing insns from moving up beyond the
5544 last argument register store. So we emit a blockage insn here. */
5545 emit_insn (gen_blockage ());
5547 return copy_to_reg (expand_binop (Pmode, add_optab,
5548 current_function_internal_arg_pointer,
5549 offset, 0, 0, OPTAB_LIB_WIDEN));
5553 hppa_va_start (valist, nextarg)
5557 nextarg = expand_builtin_saveregs ();
5558 std_expand_builtin_va_start (valist, nextarg);
5562 hppa_va_arg (valist, type)
5565 HOST_WIDE_INT size = int_size_in_bytes (type);
5571 /* Every argument in PA64 is supposed to be passed by value
5572 (including large structs). However, as a GCC extension, we
5573 pass zero and variable sized arguments by reference. Empty
5574 structures are a GCC extension not supported by the HP
5575 compilers. Thus, passing them by reference isn't likely
5576 to conflict with the ABI. For variable sized arguments,
5577 GCC doesn't have the infrastructure to allocate these to
5580 /* Arguments with a size greater than 8 must be aligned 0 MOD 16. */
5582 if (size > UNITS_PER_WORD)
5584 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5585 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
5586 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
5587 build_int_2 (-2 * UNITS_PER_WORD, -1));
5588 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5589 TREE_SIDE_EFFECTS (t) = 1;
5590 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5594 return std_expand_builtin_va_arg (valist, type);
5597 ptr = build_pointer_type (type);
5599 /* Args grow upward. */
5600 t = build (POSTINCREMENT_EXPR, TREE_TYPE (valist), valist,
5601 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5602 TREE_SIDE_EFFECTS (t) = 1;
5604 pptr = build_pointer_type (ptr);
5605 t = build1 (NOP_EXPR, pptr, t);
5606 TREE_SIDE_EFFECTS (t) = 1;
5608 t = build1 (INDIRECT_REF, ptr, t);
5609 TREE_SIDE_EFFECTS (t) = 1;
5612 else /* !TARGET_64BIT */
5614 ptr = build_pointer_type (type);
5616 /* "Large" and variable sized types are passed by reference. */
5617 if (size > 8 || size <= 0)
5619 /* Args grow downward. */
5620 t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
5621 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5622 TREE_SIDE_EFFECTS (t) = 1;
5624 pptr = build_pointer_type (ptr);
5625 t = build1 (NOP_EXPR, pptr, t);
5626 TREE_SIDE_EFFECTS (t) = 1;
5628 t = build1 (INDIRECT_REF, ptr, t);
5629 TREE_SIDE_EFFECTS (t) = 1;
5633 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5634 build_int_2 (-size, -1));
5636 /* Copied from va-pa.h, but we probably don't need to align to
5637 word size, since we generate and preserve that invariant. */
5638 t = build (BIT_AND_EXPR, TREE_TYPE (valist), t,
5639 build_int_2 ((size > 4 ? -8 : -4), -1));
5641 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5642 TREE_SIDE_EFFECTS (t) = 1;
5644 ofs = (8 - size) % 4;
5647 t = build (PLUS_EXPR, TREE_TYPE (valist), t,
5648 build_int_2 (ofs, 0));
5649 TREE_SIDE_EFFECTS (t) = 1;
5652 t = build1 (NOP_EXPR, ptr, t);
5653 TREE_SIDE_EFFECTS (t) = 1;
5658 return expand_expr (t, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5663 /* This routine handles all the normal conditional branch sequences we
5664 might need to generate. It handles compare immediate vs compare
5665 register, nullification of delay slots, varying length branches,
5666 negated branches, and all combinations of the above. It returns the
5667 output appropriate to emit the branch corresponding to all given
5671 output_cbranch (operands, nullify, length, negated, insn)
5673 int nullify, length, negated;
5676 static char buf[100];
5680 /* A conditional branch to the following instruction (eg the delay slot)
5681 is asking for a disaster. This can happen when not optimizing and
5682 when jump optimization fails.
5684 While it is usually safe to emit nothing, this can fail if the
5685 preceding instruction is a nullified branch with an empty delay
5686 slot and the same branch target as this branch. We could check
5687 for this but jump optimization should eliminate nop jumps. It
5688 is always safe to emit a nop. */
5689 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
5692 /* If this is a long branch with its delay slot unfilled, set `nullify'
5693 as it can nullify the delay slot and save a nop. */
5694 if (length == 8 && dbr_sequence_length () == 0)
5697 /* If this is a short forward conditional branch which did not get
5698 its delay slot filled, the delay slot can still be nullified. */
5699 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5700 nullify = forward_branch_p (insn);
5702 /* A forward branch over a single nullified insn can be done with a
5703 comclr instruction. This avoids a single cycle penalty due to
5704 mis-predicted branch if we fall through (branch not taken). */
5706 && next_real_insn (insn) != 0
5707 && get_attr_length (next_real_insn (insn)) == 4
5708 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5714 /* All short conditional branches except backwards with an unfilled
5718 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5720 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5721 if (GET_MODE (operands[1]) == DImode)
5724 strcat (buf, "%B3");
5726 strcat (buf, "%S3");
5728 strcat (buf, " %2,%r1,%%r0");
5730 strcat (buf, ",n %2,%r1,%0");
5732 strcat (buf, " %2,%r1,%0");
5735 /* All long conditionals. Note a short backward branch with an
5736 unfilled delay slot is treated just like a long backward branch
5737 with an unfilled delay slot. */
5739 /* Handle weird backwards branch with a filled delay slot
5740 with is nullified. */
5741 if (dbr_sequence_length () != 0
5742 && ! forward_branch_p (insn)
5745 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5746 if (GET_MODE (operands[1]) == DImode)
5749 strcat (buf, "%S3");
5751 strcat (buf, "%B3");
5752 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
5754 /* Handle short backwards branch with an unfilled delay slot.
5755 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
5756 taken and untaken branches. */
5757 else if (dbr_sequence_length () == 0
5758 && ! forward_branch_p (insn)
5759 && INSN_ADDRESSES_SET_P ()
5760 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5761 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5763 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5764 if (GET_MODE (operands[1]) == DImode)
5767 strcat (buf, "%B3 %2,%r1,%0%#");
5769 strcat (buf, "%S3 %2,%r1,%0%#");
5773 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5774 if (GET_MODE (operands[1]) == DImode)
5777 strcat (buf, "%S3");
5779 strcat (buf, "%B3");
5781 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
5783 strcat (buf, " %2,%r1,%%r0\n\tb %0");
5789 xoperands[0] = operands[0];
5790 xoperands[1] = operands[1];
5791 xoperands[2] = operands[2];
5792 xoperands[3] = operands[3];
5794 /* The reversed conditional branch must branch over one additional
5795 instruction if the delay slot is filled. If the delay slot
5796 is empty, the instruction after the reversed condition branch
5797 must be nullified. */
5798 nullify = dbr_sequence_length () == 0;
5799 xoperands[4] = nullify ? GEN_INT (length) : GEN_INT (length + 4);
5801 /* Create a reversed conditional branch which branches around
5802 the following insns. */
5803 if (GET_MODE (operands[1]) != DImode)
5809 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
5812 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
5818 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
5821 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
5830 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
5833 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
5839 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
5842 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
5846 output_asm_insn (buf, xoperands);
5847 return output_lbranch (operands[0], insn);
5855 /* This routine handles long unconditional branches that exceed the
5856 maximum range of a simple branch instruction. */
5859 output_lbranch (dest, insn)
5864 xoperands[0] = dest;
5866 /* First, free up the delay slot. */
5867 if (dbr_sequence_length () != 0)
5869 /* We can't handle a jump in the delay slot. */
5870 if (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN)
5873 final_scan_insn (NEXT_INSN (insn), asm_out_file,
5876 /* Now delete the delay insn. */
5877 PUT_CODE (NEXT_INSN (insn), NOTE);
5878 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5879 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5882 /* Output an insn to save %r1. The runtime documentation doesn't
5883 specify whether the "Clean Up" slot in the callers frame can
5884 be clobbered by the callee. It isn't copied by HP's builtin
5885 alloca, so this suggests that it can be clobbered if necessary.
5886 The "Static Link" location is copied by HP builtin alloca, so
5887 we avoid using it. Using the cleanup slot might be a problem
5888 if we have to interoperate with languages that pass cleanup
5889 information. However, it should be possible to handle these
5890 situations with GCC's asm feature.
5892 The "Current RP" slot is reserved for the called procedure, so
5893 we try to use it when we don't have a frame of our own. It's
5894 rather unlikely that we won't have a frame when we need to emit
5897 Really the way to go long term is a register scavenger; goto
5898 the target of the jump and find a register which we can use
5899 as a scratch to hold the value in %r1. Then, we wouldn't have
5900 to free up the delay slot or clobber a slot that may be needed
5901 for other purposes. */
5904 if (actual_fsize == 0 && !regs_ever_live[2])
5905 /* Use the return pointer slot in the frame marker. */
5906 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
5908 /* Use the slot at -40 in the frame marker since HP builtin
5909 alloca doesn't copy it. */
5910 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
5914 if (actual_fsize == 0 && !regs_ever_live[2])
5915 /* Use the return pointer slot in the frame marker. */
5916 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
5918 /* Use the "Clean Up" slot in the frame marker. In GCC,
5919 the only other use of this location is for copying a
5920 floating point double argument from a floating-point
5921 register to two general registers. The copy is done
5922 as an "atomic" operation when outputting a call, so it
5923 won't interfere with our using the location here. */
5924 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
5927 if (TARGET_PORTABLE_RUNTIME)
5929 output_asm_insn ("ldil L'%0,%%r1", xoperands);
5930 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
5931 output_asm_insn ("bv %%r0(%%r1)", xoperands);
5935 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
5936 if (TARGET_SOM || !TARGET_GAS)
5938 xoperands[1] = gen_label_rtx ();
5939 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
5940 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5941 CODE_LABEL_NUMBER (xoperands[1]));
5942 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
5946 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
5947 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
5949 output_asm_insn ("bv %%r0(%%r1)", xoperands);
5952 /* Now output a very long branch to the original target. */
5953 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
5955 /* Now restore the value of %r1 in the delay slot. */
5958 if (actual_fsize == 0 && !regs_ever_live[2])
5959 return "ldd -16(%%r30),%%r1";
5961 return "ldd -40(%%r30),%%r1";
5965 if (actual_fsize == 0 && !regs_ever_live[2])
5966 return "ldw -20(%%r30),%%r1";
5968 return "ldw -12(%%r30),%%r1";
5972 /* This routine handles all the branch-on-bit conditional branch sequences we
5973 might need to generate. It handles nullification of delay slots,
5974 varying length branches, negated branches and all combinations of the
5975 above. it returns the appropriate output template to emit the branch. */
5978 output_bb (operands, nullify, length, negated, insn, which)
5979 rtx *operands ATTRIBUTE_UNUSED;
5980 int nullify, length, negated;
5984 static char buf[100];
5987 /* A conditional branch to the following instruction (eg the delay slot) is
5988 asking for a disaster. I do not think this can happen as this pattern
5989 is only used when optimizing; jump optimization should eliminate the
5990 jump. But be prepared just in case. */
5992 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
5995 /* If this is a long branch with its delay slot unfilled, set `nullify'
5996 as it can nullify the delay slot and save a nop. */
5997 if (length == 8 && dbr_sequence_length () == 0)
6000 /* If this is a short forward conditional branch which did not get
6001 its delay slot filled, the delay slot can still be nullified. */
6002 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6003 nullify = forward_branch_p (insn);
6005 /* A forward branch over a single nullified insn can be done with a
6006 extrs instruction. This avoids a single cycle penalty due to
6007 mis-predicted branch if we fall through (branch not taken). */
6010 && next_real_insn (insn) != 0
6011 && get_attr_length (next_real_insn (insn)) == 4
6012 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6019 /* All short conditional branches except backwards with an unfilled
6023 strcpy (buf, "{extrs,|extrw,s,}");
6025 strcpy (buf, "bb,");
6026 if (useskip && GET_MODE (operands[0]) == DImode)
6027 strcpy (buf, "extrd,s,*");
6028 else if (GET_MODE (operands[0]) == DImode)
6029 strcpy (buf, "bb,*");
6030 if ((which == 0 && negated)
6031 || (which == 1 && ! negated))
6036 strcat (buf, " %0,%1,1,%%r0");
6037 else if (nullify && negated)
6038 strcat (buf, ",n %0,%1,%3");
6039 else if (nullify && ! negated)
6040 strcat (buf, ",n %0,%1,%2");
6041 else if (! nullify && negated)
6042 strcat (buf, "%0,%1,%3");
6043 else if (! nullify && ! negated)
6044 strcat (buf, " %0,%1,%2");
6047 /* All long conditionals. Note a short backward branch with an
6048 unfilled delay slot is treated just like a long backward branch
6049 with an unfilled delay slot. */
6051 /* Handle weird backwards branch with a filled delay slot
6052 with is nullified. */
6053 if (dbr_sequence_length () != 0
6054 && ! forward_branch_p (insn)
6057 strcpy (buf, "bb,");
6058 if (GET_MODE (operands[0]) == DImode)
6060 if ((which == 0 && negated)
6061 || (which == 1 && ! negated))
6066 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6068 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6070 /* Handle short backwards branch with an unfilled delay slot.
6071 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6072 taken and untaken branches. */
6073 else if (dbr_sequence_length () == 0
6074 && ! forward_branch_p (insn)
6075 && INSN_ADDRESSES_SET_P ()
6076 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6077 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6079 strcpy (buf, "bb,");
6080 if (GET_MODE (operands[0]) == DImode)
6082 if ((which == 0 && negated)
6083 || (which == 1 && ! negated))
6088 strcat (buf, " %0,%1,%3%#");
6090 strcat (buf, " %0,%1,%2%#");
6094 strcpy (buf, "{extrs,|extrw,s,}");
6095 if (GET_MODE (operands[0]) == DImode)
6096 strcpy (buf, "extrd,s,*");
6097 if ((which == 0 && negated)
6098 || (which == 1 && ! negated))
6102 if (nullify && negated)
6103 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6104 else if (nullify && ! negated)
6105 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6107 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6109 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6119 /* This routine handles all the branch-on-variable-bit conditional branch
6120 sequences we might need to generate. It handles nullification of delay
6121 slots, varying length branches, negated branches and all combinations
6122 of the above. it returns the appropriate output template to emit the
6126 output_bvb (operands, nullify, length, negated, insn, which)
6127 rtx *operands ATTRIBUTE_UNUSED;
6128 int nullify, length, negated;
6132 static char buf[100];
6135 /* A conditional branch to the following instruction (eg the delay slot) is
6136 asking for a disaster. I do not think this can happen as this pattern
6137 is only used when optimizing; jump optimization should eliminate the
6138 jump. But be prepared just in case. */
6140 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6143 /* If this is a long branch with its delay slot unfilled, set `nullify'
6144 as it can nullify the delay slot and save a nop. */
6145 if (length == 8 && dbr_sequence_length () == 0)
6148 /* If this is a short forward conditional branch which did not get
6149 its delay slot filled, the delay slot can still be nullified. */
6150 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6151 nullify = forward_branch_p (insn);
6153 /* A forward branch over a single nullified insn can be done with a
6154 extrs instruction. This avoids a single cycle penalty due to
6155 mis-predicted branch if we fall through (branch not taken). */
6158 && next_real_insn (insn) != 0
6159 && get_attr_length (next_real_insn (insn)) == 4
6160 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6167 /* All short conditional branches except backwards with an unfilled
6171 strcpy (buf, "{vextrs,|extrw,s,}");
6173 strcpy (buf, "{bvb,|bb,}");
6174 if (useskip && GET_MODE (operands[0]) == DImode)
6175 strcpy (buf, "extrd,s,*}");
6176 else if (GET_MODE (operands[0]) == DImode)
6177 strcpy (buf, "bb,*");
6178 if ((which == 0 && negated)
6179 || (which == 1 && ! negated))
6184 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6185 else if (nullify && negated)
6186 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6187 else if (nullify && ! negated)
6188 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6189 else if (! nullify && negated)
6190 strcat (buf, "{%0,%3|%0,%%sar,%3}");
6191 else if (! nullify && ! negated)
6192 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6195 /* All long conditionals. Note a short backward branch with an
6196 unfilled delay slot is treated just like a long backward branch
6197 with an unfilled delay slot. */
6199 /* Handle weird backwards branch with a filled delay slot
6200 with is nullified. */
6201 if (dbr_sequence_length () != 0
6202 && ! forward_branch_p (insn)
6205 strcpy (buf, "{bvb,|bb,}");
6206 if (GET_MODE (operands[0]) == DImode)
6208 if ((which == 0 && negated)
6209 || (which == 1 && ! negated))
6214 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6216 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6218 /* Handle short backwards branch with an unfilled delay slot.
6219 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6220 taken and untaken branches. */
6221 else if (dbr_sequence_length () == 0
6222 && ! forward_branch_p (insn)
6223 && INSN_ADDRESSES_SET_P ()
6224 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6225 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6227 strcpy (buf, "{bvb,|bb,}");
6228 if (GET_MODE (operands[0]) == DImode)
6230 if ((which == 0 && negated)
6231 || (which == 1 && ! negated))
6236 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6238 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6242 strcpy (buf, "{vextrs,|extrw,s,}");
6243 if (GET_MODE (operands[0]) == DImode)
6244 strcpy (buf, "extrd,s,*");
6245 if ((which == 0 && negated)
6246 || (which == 1 && ! negated))
6250 if (nullify && negated)
6251 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6252 else if (nullify && ! negated)
6253 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6255 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6257 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6267 /* Return the output template for emitting a dbra type insn.
6269 Note it may perform some output operations on its own before
6270 returning the final output string. */
6272 output_dbra (operands, insn, which_alternative)
6275 int which_alternative;
6278 /* A conditional branch to the following instruction (eg the delay slot) is
6279 asking for a disaster. Be prepared! */
6281 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6283 if (which_alternative == 0)
6284 return "ldo %1(%0),%0";
6285 else if (which_alternative == 1)
6287 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6288 output_asm_insn ("ldw -16(%%r30),%4", operands);
6289 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6290 return "{fldws|fldw} -16(%%r30),%0";
6294 output_asm_insn ("ldw %0,%4", operands);
6295 return "ldo %1(%4),%4\n\tstw %4,%0";
6299 if (which_alternative == 0)
6301 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6302 int length = get_attr_length (insn);
6304 /* If this is a long branch with its delay slot unfilled, set `nullify'
6305 as it can nullify the delay slot and save a nop. */
6306 if (length == 8 && dbr_sequence_length () == 0)
6309 /* If this is a short forward conditional branch which did not get
6310 its delay slot filled, the delay slot can still be nullified. */
6311 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6312 nullify = forward_branch_p (insn);
6314 /* Handle short versions first. */
6315 if (length == 4 && nullify)
6316 return "addib,%C2,n %1,%0,%3";
6317 else if (length == 4 && ! nullify)
6318 return "addib,%C2 %1,%0,%3";
6319 else if (length == 8)
6321 /* Handle weird backwards branch with a fulled delay slot
6322 which is nullified. */
6323 if (dbr_sequence_length () != 0
6324 && ! forward_branch_p (insn)
6326 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6327 /* Handle short backwards branch with an unfilled delay slot.
6328 Using a addb;nop rather than addi;bl saves 1 cycle for both
6329 taken and untaken branches. */
6330 else if (dbr_sequence_length () == 0
6331 && ! forward_branch_p (insn)
6332 && INSN_ADDRESSES_SET_P ()
6333 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6334 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6335 return "addib,%C2 %1,%0,%3%#";
6337 /* Handle normal cases. */
6339 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6341 return "addi,%N2 %1,%0,%0\n\tb %3";
6346 /* Deal with gross reload from FP register case. */
6347 else if (which_alternative == 1)
6349 /* Move loop counter from FP register to MEM then into a GR,
6350 increment the GR, store the GR into MEM, and finally reload
6351 the FP register from MEM from within the branch's delay slot. */
6352 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6354 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6355 if (get_attr_length (insn) == 24)
6356 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6358 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6360 /* Deal with gross reload from memory case. */
6363 /* Reload loop counter from memory, the store back to memory
6364 happens in the branch's delay slot. */
6365 output_asm_insn ("ldw %0,%4", operands);
6366 if (get_attr_length (insn) == 12)
6367 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6369 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6373 /* Return the output template for emitting a dbra type insn.
6375 Note it may perform some output operations on its own before
6376 returning the final output string. */
6378 output_movb (operands, insn, which_alternative, reverse_comparison)
6381 int which_alternative;
6382 int reverse_comparison;
6385 /* A conditional branch to the following instruction (eg the delay slot) is
6386 asking for a disaster. Be prepared! */
6388 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6390 if (which_alternative == 0)
6391 return "copy %1,%0";
6392 else if (which_alternative == 1)
6394 output_asm_insn ("stw %1,-16(%%r30)", operands);
6395 return "{fldws|fldw} -16(%%r30),%0";
6397 else if (which_alternative == 2)
6403 /* Support the second variant. */
6404 if (reverse_comparison)
6405 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6407 if (which_alternative == 0)
6409 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6410 int length = get_attr_length (insn);
6412 /* If this is a long branch with its delay slot unfilled, set `nullify'
6413 as it can nullify the delay slot and save a nop. */
6414 if (length == 8 && dbr_sequence_length () == 0)
6417 /* If this is a short forward conditional branch which did not get
6418 its delay slot filled, the delay slot can still be nullified. */
6419 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6420 nullify = forward_branch_p (insn);
6422 /* Handle short versions first. */
6423 if (length == 4 && nullify)
6424 return "movb,%C2,n %1,%0,%3";
6425 else if (length == 4 && ! nullify)
6426 return "movb,%C2 %1,%0,%3";
6427 else if (length == 8)
6429 /* Handle weird backwards branch with a filled delay slot
6430 which is nullified. */
6431 if (dbr_sequence_length () != 0
6432 && ! forward_branch_p (insn)
6434 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6436 /* Handle short backwards branch with an unfilled delay slot.
6437 Using a movb;nop rather than or;bl saves 1 cycle for both
6438 taken and untaken branches. */
6439 else if (dbr_sequence_length () == 0
6440 && ! forward_branch_p (insn)
6441 && INSN_ADDRESSES_SET_P ()
6442 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6443 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6444 return "movb,%C2 %1,%0,%3%#";
6445 /* Handle normal cases. */
6447 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6449 return "or,%N2 %1,%%r0,%0\n\tb %3";
6454 /* Deal with gross reload from FP register case. */
6455 else if (which_alternative == 1)
6457 /* Move loop counter from FP register to MEM then into a GR,
6458 increment the GR, store the GR into MEM, and finally reload
6459 the FP register from MEM from within the branch's delay slot. */
6460 output_asm_insn ("stw %1,-16(%%r30)", operands);
6461 if (get_attr_length (insn) == 12)
6462 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6464 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6466 /* Deal with gross reload from memory case. */
6467 else if (which_alternative == 2)
6469 /* Reload loop counter from memory, the store back to memory
6470 happens in the branch's delay slot. */
6471 if (get_attr_length (insn) == 8)
6472 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6474 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6476 /* Handle SAR as a destination. */
6479 if (get_attr_length (insn) == 8)
6480 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6482 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
6486 /* Copy any FP arguments in INSN into integer registers. */
6494 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6496 int arg_mode, regno;
6497 rtx use = XEXP (link, 0);
6499 if (! (GET_CODE (use) == USE
6500 && GET_CODE (XEXP (use, 0)) == REG
6501 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6504 arg_mode = GET_MODE (XEXP (use, 0));
6505 regno = REGNO (XEXP (use, 0));
6507 /* Is it a floating point register? */
6508 if (regno >= 32 && regno <= 39)
6510 /* Copy the FP register into an integer register via memory. */
6511 if (arg_mode == SFmode)
6513 xoperands[0] = XEXP (use, 0);
6514 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6515 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
6516 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6520 xoperands[0] = XEXP (use, 0);
6521 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6522 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
6523 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6524 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6530 /* Compute length of the FP argument copy sequence for INSN. */
6532 length_fp_args (insn)
6538 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6540 int arg_mode, regno;
6541 rtx use = XEXP (link, 0);
6543 if (! (GET_CODE (use) == USE
6544 && GET_CODE (XEXP (use, 0)) == REG
6545 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6548 arg_mode = GET_MODE (XEXP (use, 0));
6549 regno = REGNO (XEXP (use, 0));
6551 /* Is it a floating point register? */
6552 if (regno >= 32 && regno <= 39)
6554 if (arg_mode == SFmode)
6564 /* Return the attribute length for the millicode call instruction INSN.
6565 The length must match the code generated by output_millicode_call.
6566 We include the delay slot in the returned length as it is better to
6567 over estimate the length than to under estimate it. */
6570 attr_length_millicode_call (insn)
6573 unsigned long distance = -1;
6574 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
6576 if (INSN_ADDRESSES_SET_P ())
6578 distance = (total + insn_current_reference_address (insn));
6579 if (distance < total)
6585 if (!TARGET_LONG_CALLS && distance < 7600000)
6590 else if (TARGET_PORTABLE_RUNTIME)
6594 if (!TARGET_LONG_CALLS && distance < 240000)
6597 if (TARGET_LONG_ABS_CALL && !flag_pic)
6604 /* INSN is a function call. It may have an unconditional jump
6607 CALL_DEST is the routine we are calling. */
6610 output_millicode_call (insn, call_dest)
6614 int attr_length = get_attr_length (insn);
6615 int seq_length = dbr_sequence_length ();
6620 xoperands[0] = call_dest;
6621 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6623 /* Handle the common case where we are sure that the branch will
6624 reach the beginning of the $CODE$ subspace. The within reach
6625 form of the $$sh_func_adrs call has a length of 28. Because
6626 it has an attribute type of multi, it never has a nonzero
6627 sequence length. The length of the $$sh_func_adrs is the same
6628 as certain out of reach PIC calls to other routines. */
6629 if (!TARGET_LONG_CALLS
6630 && ((seq_length == 0
6631 && (attr_length == 12
6632 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
6633 || (seq_length != 0 && attr_length == 8)))
6635 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
6641 /* It might seem that one insn could be saved by accessing
6642 the millicode function using the linkage table. However,
6643 this doesn't work in shared libraries and other dynamically
6644 loaded objects. Using a pc-relative sequence also avoids
6645 problems related to the implicit use of the gp register. */
6646 output_asm_insn ("b,l .+8,%%r1", xoperands);
6650 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
6651 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6655 xoperands[1] = gen_label_rtx ();
6656 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6657 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6658 CODE_LABEL_NUMBER (xoperands[1]));
6659 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6662 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6664 else if (TARGET_PORTABLE_RUNTIME)
6666 /* Pure portable runtime doesn't allow be/ble; we also don't
6667 have PIC support in the assembler/linker, so this sequence
6670 /* Get the address of our target into %r1. */
6671 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6672 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6674 /* Get our return address into %r31. */
6675 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
6676 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
6678 /* Jump to our target address in %r1. */
6679 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6683 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6685 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
6687 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6691 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6692 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
6694 if (TARGET_SOM || !TARGET_GAS)
6696 /* The HP assembler can generate relocations for the
6697 difference of two symbols. GAS can do this for a
6698 millicode symbol but not an arbitrary external
6699 symbol when generating SOM output. */
6700 xoperands[1] = gen_label_rtx ();
6701 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6702 CODE_LABEL_NUMBER (xoperands[1]));
6703 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6704 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6708 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
6709 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
6713 /* Jump to our target address in %r1. */
6714 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6718 if (seq_length == 0)
6719 output_asm_insn ("nop", xoperands);
6721 /* We are done if there isn't a jump in the delay slot. */
6722 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6725 /* This call has an unconditional jump in its delay slot. */
6726 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6728 /* See if the return address can be adjusted. Use the containing
6729 sequence insn's address. */
6730 if (INSN_ADDRESSES_SET_P ())
6732 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6733 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6734 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
6736 if (VAL_14_BITS_P (distance))
6738 xoperands[1] = gen_label_rtx ();
6739 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
6740 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6741 CODE_LABEL_NUMBER (xoperands[1]));
6744 /* ??? This branch may not reach its target. */
6745 output_asm_insn ("nop\n\tb,n %0", xoperands);
6748 /* ??? This branch may not reach its target. */
6749 output_asm_insn ("nop\n\tb,n %0", xoperands);
6751 /* Delete the jump. */
6752 PUT_CODE (NEXT_INSN (insn), NOTE);
6753 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6754 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6759 /* Return the attribute length of the call instruction INSN. The SIBCALL
6760 flag indicates whether INSN is a regular call or a sibling call. The
6761 length returned must be longer than the code generated by output_call.
6762 When the target supports jumps in the delay slot, we need an extra
6763 four bytes to handle the situation where the jump can't reach its
6767 attr_length_call (insn, sibcall)
6771 unsigned long distance = -1;
6772 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
6774 if (INSN_ADDRESSES_SET_P ())
6776 distance = (total + insn_current_reference_address (insn));
6777 if (distance < total)
6783 if (!TARGET_LONG_CALLS
6784 && ((!sibcall && distance < 7600000) || distance < 240000))
6787 return (sibcall ? 28 : 24);
6791 if (!TARGET_LONG_CALLS
6792 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
6793 || distance < 240000))
6796 if (TARGET_LONG_ABS_CALL && !flag_pic)
6799 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
6800 || (TARGET_GAS && TARGET_LONG_PIC_PCREL_CALL))
6812 length += length_fp_args (insn);
6823 if (!TARGET_NO_SPACE_REGS)
6834 /* INSN is a function call. It may have an unconditional jump
6837 CALL_DEST is the routine we are calling. */
6840 output_call (insn, call_dest, sibcall)
6845 int delay_insn_deleted = 0;
6846 int delay_slot_filled = 0;
6847 int seq_length = dbr_sequence_length ();
6848 tree call_decl = SYMBOL_REF_DECL (call_dest);
6849 int local_call = call_decl && !TREE_PUBLIC (call_decl);
6852 xoperands[0] = call_dest;
6854 /* Handle the common case where we're sure that the branch will reach
6855 the beginning of the "$CODE$" subspace. This is the beginning of
6856 the current function if we are in a named section. */
6857 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
6859 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
6860 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
6864 if (TARGET_64BIT && !local_call)
6866 /* ??? As far as I can tell, the HP linker doesn't support the
6867 long pc-relative sequence described in the 64-bit runtime
6868 architecture. So, we use a slightly longer indirect call. */
6869 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
6871 xoperands[0] = p->internal_label;
6872 xoperands[1] = gen_label_rtx ();
6874 /* If this isn't a sibcall, we put the load of %r27 into the
6875 delay slot. We can't do this in a sibcall as we don't
6876 have a second call-clobbered scratch register available. */
6878 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6881 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6884 /* Now delete the delay insn. */
6885 PUT_CODE (NEXT_INSN (insn), NOTE);
6886 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6887 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6888 delay_insn_deleted = 1;
6891 output_asm_insn ("addil LT'%0,%%r27", xoperands);
6892 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
6893 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
6897 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
6898 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
6899 output_asm_insn ("bve (%%r1)", xoperands);
6903 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
6904 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
6905 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
6906 delay_slot_filled = 1;
6911 int indirect_call = 0;
6913 /* Emit a long call. There are several different sequences
6914 of increasing length and complexity. In most cases,
6915 they don't allow an instruction in the delay slot. */
6916 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
6917 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
6918 && !(TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
6923 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6925 && (!TARGET_PA_20 || indirect_call))
6927 /* A non-jump insn in the delay slot. By definition we can
6928 emit this insn before the call (and in fact before argument
6930 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
6932 /* Now delete the delay insn. */
6933 PUT_CODE (NEXT_INSN (insn), NOTE);
6934 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6935 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6936 delay_insn_deleted = 1;
6939 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
6941 /* This is the best sequence for making long calls in
6942 non-pic code. Unfortunately, GNU ld doesn't provide
6943 the stub needed for external calls, and GAS's support
6944 for this with the SOM linker is buggy. It is safe
6945 to use this for local calls. */
6946 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6948 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
6952 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
6955 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6957 output_asm_insn ("copy %%r31,%%r2", xoperands);
6958 delay_slot_filled = 1;
6963 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
6964 || (TARGET_64BIT && !TARGET_GAS))
6966 /* The HP assembler and linker can handle relocations
6967 for the difference of two symbols. GAS and the HP
6968 linker can't do this when one of the symbols is
6970 xoperands[1] = gen_label_rtx ();
6971 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6972 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6973 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6974 CODE_LABEL_NUMBER (xoperands[1]));
6975 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6977 else if (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
6979 /* GAS currently can't generate the relocations that
6980 are needed for the SOM linker under HP-UX using this
6981 sequence. The GNU linker doesn't generate the stubs
6982 that are needed for external calls on TARGET_ELF32
6983 with this sequence. For now, we have to use a
6984 longer plabel sequence when using GAS. */
6985 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6986 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
6988 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
6993 /* Emit a long plabel-based call sequence. This is
6994 essentially an inline implementation of $$dyncall.
6995 We don't actually try to call $$dyncall as this is
6996 as difficult as calling the function itself. */
6997 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
6999 xoperands[0] = p->internal_label;
7000 xoperands[1] = gen_label_rtx ();
7002 /* Since the call is indirect, FP arguments in registers
7003 need to be copied to the general registers. Then, the
7004 argument relocation stub will copy them back. */
7006 copy_fp_args (insn);
7010 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7011 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7012 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7016 output_asm_insn ("addil LR'%0-$global$,%%r27",
7018 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7022 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7023 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7024 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7025 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7027 if (!sibcall && !TARGET_PA_20)
7029 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7030 if (TARGET_NO_SPACE_REGS)
7031 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7033 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7040 output_asm_insn ("bve (%%r1)", xoperands);
7045 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7046 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7047 delay_slot_filled = 1;
7050 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7055 if (!TARGET_NO_SPACE_REGS)
7056 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7061 if (TARGET_NO_SPACE_REGS)
7062 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7064 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7068 if (TARGET_NO_SPACE_REGS)
7069 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7071 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7074 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7076 output_asm_insn ("copy %%r31,%%r2", xoperands);
7077 delay_slot_filled = 1;
7084 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7085 output_asm_insn ("nop", xoperands);
7087 /* We are done if there isn't a jump in the delay slot. */
7089 || delay_insn_deleted
7090 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7093 /* A sibcall should never have a branch in the delay slot. */
7097 /* This call has an unconditional jump in its delay slot. */
7098 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7100 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7102 /* See if the return address can be adjusted. Use the containing
7103 sequence insn's address. */
7104 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7105 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7106 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7108 if (VAL_14_BITS_P (distance))
7110 xoperands[1] = gen_label_rtx ();
7111 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7112 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7113 CODE_LABEL_NUMBER (xoperands[1]));
7116 output_asm_insn ("nop\n\tb,n %0", xoperands);
7119 output_asm_insn ("b,n %0", xoperands);
7121 /* Delete the jump. */
7122 PUT_CODE (NEXT_INSN (insn), NOTE);
7123 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7124 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7129 /* Return the attribute length of the indirect call instruction INSN.
7130 The length must match the code generated by output_indirect call.
7131 The returned length includes the delay slot. Currently, the delay
7132 slot of an indirect call sequence is not exposed and it is used by
7133 the sequence itself. */
7136 attr_length_indirect_call (insn)
7139 unsigned long distance = -1;
7140 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7142 if (INSN_ADDRESSES_SET_P ())
7144 distance = (total + insn_current_reference_address (insn));
7145 if (distance < total)
7152 if (TARGET_FAST_INDIRECT_CALLS
7153 || (!TARGET_PORTABLE_RUNTIME
7154 && ((TARGET_PA_20 && distance < 7600000) || distance < 240000)))
7160 if (TARGET_PORTABLE_RUNTIME)
7163 /* Out of reach, can use ble. */
7168 output_indirect_call (insn, call_dest)
7176 xoperands[0] = call_dest;
7177 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7178 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7182 /* First the special case for kernels, level 0 systems, etc. */
7183 if (TARGET_FAST_INDIRECT_CALLS)
7184 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7186 /* Now the normal case -- we can reach $$dyncall directly or
7187 we're sure that we can get there via a long-branch stub.
7189 No need to check target flags as the length uniquely identifies
7190 the remaining cases. */
7191 if (attr_length_indirect_call (insn) == 8)
7192 return ".CALL\tARGW0=GR\n\t{bl|b,l} $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7194 /* Long millicode call, but we are not generating PIC or portable runtime
7196 if (attr_length_indirect_call (insn) == 12)
7197 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7199 /* Long millicode call for portable runtime. */
7200 if (attr_length_indirect_call (insn) == 20)
7201 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7203 /* We need a long PIC call to $$dyncall. */
7204 xoperands[0] = NULL_RTX;
7205 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7206 if (TARGET_SOM || !TARGET_GAS)
7208 xoperands[0] = gen_label_rtx ();
7209 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7210 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7211 CODE_LABEL_NUMBER (xoperands[0]));
7212 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7216 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7217 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7220 output_asm_insn ("blr %%r0,%%r2", xoperands);
7221 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7225 /* Return the total length of the save and restore instructions needed for
7226 the data linkage table pointer (i.e., the PIC register) across the call
7227 instruction INSN. No-return calls do not require a save and restore.
7228 In addition, we may be able to avoid the save and restore for calls
7229 within the same translation unit. */
7232 attr_length_save_restore_dltp (insn)
7235 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7241 /* In HPUX 8.0's shared library scheme, special relocations are needed
7242 for function labels if they might be passed to a function
7243 in a shared library (because shared libraries don't live in code
7244 space), and special magic is needed to construct their address. */
7247 hppa_encode_label (sym)
7250 const char *str = XSTR (sym, 0);
7251 int len = strlen (str) + 1;
7254 p = newstr = alloca (len + 1);
7258 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7262 pa_encode_section_info (decl, rtl, first)
7267 if (first && TEXT_SPACE_P (decl))
7269 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7270 if (TREE_CODE (decl) == FUNCTION_DECL)
7271 hppa_encode_label (XEXP (rtl, 0));
7275 /* This is sort of inverse to pa_encode_section_info. */
7278 pa_strip_name_encoding (str)
7281 str += (*str == '@');
7282 str += (*str == '*');
7287 function_label_operand (op, mode)
7289 enum machine_mode mode ATTRIBUTE_UNUSED;
7291 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7294 /* Returns 1 if OP is a function label involved in a simple addition
7295 with a constant. Used to keep certain patterns from matching
7296 during instruction combination. */
7298 is_function_label_plus_const (op)
7301 /* Strip off any CONST. */
7302 if (GET_CODE (op) == CONST)
7305 return (GET_CODE (op) == PLUS
7306 && function_label_operand (XEXP (op, 0), Pmode)
7307 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7310 /* Output assembly code for a thunk to FUNCTION. */
7313 pa_asm_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
7316 HOST_WIDE_INT delta;
7317 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED;
7320 const char *fname = XSTR (XEXP (DECL_RTL (function), 0), 0);
7321 const char *tname = XSTR (XEXP (DECL_RTL (thunk_fndecl), 0), 0);
7322 int val_14 = VAL_14_BITS_P (delta);
7324 static unsigned int current_thunk_number;
7327 ASM_OUTPUT_LABEL (file, tname);
7328 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7330 fname = (*targetm.strip_name_encoding) (fname);
7331 tname = (*targetm.strip_name_encoding) (tname);
7333 /* Output the thunk. We know that the function is in the same
7334 translation unit (i.e., the same space) as the thunk, and that
7335 thunks are output after their method. Thus, we don't need an
7336 external branch to reach the function. With SOM and GAS,
7337 functions and thunks are effectively in different sections.
7338 Thus, we can always use a IA-relative branch and the linker
7339 will add a long branch stub if necessary.
7341 However, we have to be careful when generating PIC code on the
7342 SOM port to ensure that the sequence does not transfer to an
7343 import stub for the target function as this could clobber the
7344 return value saved at SP-24. This would also apply to the
7345 32-bit linux port if the multi-space model is implemented. */
7346 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7347 && !(flag_pic && TREE_PUBLIC (function))
7348 && (TARGET_GAS || last_address < 262132))
7349 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7350 && ((targetm.have_named_sections
7351 && DECL_SECTION_NAME (thunk_fndecl) != NULL
7352 /* The GNU 64-bit linker has rather poor stub management.
7353 So, we use a long branch from thunks that aren't in
7354 the same section as the target function. */
7356 && (DECL_SECTION_NAME (thunk_fndecl)
7357 != DECL_SECTION_NAME (function)))
7358 || ((DECL_SECTION_NAME (thunk_fndecl)
7359 == DECL_SECTION_NAME (function))
7360 && last_address < 262132)))
7361 || (!targetm.have_named_sections && last_address < 262132))))
7365 fprintf (file, "\tb %s\n\tldo " HOST_WIDE_INT_PRINT_DEC
7366 "(%%r26),%%r26\n", fname, delta);
7371 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7373 fprintf (file, "\tb %s\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7374 "(%%r1),%%r26\n", fname, delta);
7378 else if (TARGET_64BIT)
7380 /* We only have one call-clobbered scratch register, so we can't
7381 make use of the delay slot if delta doesn't fit in 14 bits. */
7383 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7384 ",%%r26\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7385 "(%%r1),%%r26\n", delta, delta);
7387 fprintf (file, "\tb,l .+8,%%r1\n");
7391 fprintf (file, "\taddil L'%s-$PIC_pcrel$0+4,%%r1\n", fname);
7392 fprintf (file, "\tldo R'%s-$PIC_pcrel$0+8(%%r1),%%r1\n", fname);
7396 int off = val_14 ? 8 : 16;
7397 fprintf (file, "\taddil L'%s-%s-%d,%%r1\n", fname, tname, off);
7398 fprintf (file, "\tldo R'%s-%s-%d(%%r1),%%r1\n", fname, tname, off);
7403 fprintf (file, "\tbv %%r0(%%r1)\n\tldo ");
7404 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7409 fprintf (file, "\tbv,n %%r0(%%r1)\n");
7413 else if (TARGET_PORTABLE_RUNTIME)
7415 fprintf (file, "\tldil L'%s,%%r1\n", fname);
7416 fprintf (file, "\tldo R'%s(%%r1),%%r22\n", fname);
7420 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7421 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7426 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7428 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7429 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7433 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7435 /* The function is accessible from outside this module. The only
7436 way to avoid an import stub between the thunk and function is to
7437 call the function directly with an indirect sequence similar to
7438 that used by $$dyncall. This is possible because $$dyncall acts
7439 as the import stub in an indirect call. */
7442 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7443 lab = (*targetm.strip_name_encoding) (label);
7445 fprintf (file, "\taddil LT'%s,%%r19\n", lab);
7446 fprintf (file, "\tldw RT'%s(%%r1),%%r22\n", lab);
7447 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7448 fprintf (file, "\tbb,>=,n %%r22,30,.+16\n");
7449 fprintf (file, "\tdepi 0,31,2,%%r22\n");
7450 fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
7451 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7454 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7460 fprintf (file, "\tbve (%%r22)\n\tldo ");
7465 if (TARGET_NO_SPACE_REGS)
7467 fprintf (file, "\tbe 0(%%sr4,%%r22)\n\tldo ");
7472 fprintf (file, "\tldsid (%%sr0,%%r22),%%r21\n");
7473 fprintf (file, "\tmtsp %%r21,%%sr0\n");
7474 fprintf (file, "\tbe 0(%%sr0,%%r22)\n\tldo ");
7480 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7482 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7487 fprintf (file, "\tb,l .+8,%%r1\n");
7489 fprintf (file, "\tbl .+8,%%r1\n");
7491 if (TARGET_SOM || !TARGET_GAS)
7493 fprintf (file, "\taddil L'%s-%s-8,%%r1\n", fname, tname);
7494 fprintf (file, "\tldo R'%s-%s-8(%%r1),%%r22\n", fname, tname);
7498 fprintf (file, "\taddil L'%s-$PIC_pcrel$0+4,%%r1\n", fname);
7499 fprintf (file, "\tldo R'%s-$PIC_pcrel$0+8(%%r1),%%r22\n", fname);
7504 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7505 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7510 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7512 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7513 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7520 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC ",%%r26\n", delta);
7522 fprintf (file, "\tldil L'%s,%%r22\n", fname);
7523 fprintf (file, "\tbe R'%s(%%sr4,%%r22)\n\tldo ", fname);
7527 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7532 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7537 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
7539 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7542 fprintf (file, "\t.align 4\n");
7543 ASM_OUTPUT_LABEL (file, label);
7544 fprintf (file, "\t.word P'%s\n", fname);
7545 function_section (thunk_fndecl);
7548 current_thunk_number++;
7549 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
7550 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
7551 last_address += nbytes;
7552 update_total_code_bytes (nbytes);
7555 /* Only direct calls to static functions are allowed to be sibling (tail)
7558 This restriction is necessary because some linker generated stubs will
7559 store return pointers into rp' in some cases which might clobber a
7560 live value already in rp'.
7562 In a sibcall the current function and the target function share stack
7563 space. Thus if the path to the current function and the path to the
7564 target function save a value in rp', they save the value into the
7565 same stack slot, which has undesirable consequences.
7567 Because of the deferred binding nature of shared libraries any function
7568 with external scope could be in a different load module and thus require
7569 rp' to be saved when calling that function. So sibcall optimizations
7570 can only be safe for static function.
7572 Note that GCC never needs return value relocations, so we don't have to
7573 worry about static calls with return value relocations (which require
7576 It is safe to perform a sibcall optimization when the target function
7577 will never return. */
7579 pa_function_ok_for_sibcall (decl, exp)
7581 tree exp ATTRIBUTE_UNUSED;
7583 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
7584 single subspace mode and the call is not indirect. As far as I know,
7585 there is no operating system support for the multiple subspace mode.
7586 It might be possible to support indirect calls if we didn't use
7587 $$dyncall (see the indirect sequence generated in output_call). */
7589 return (decl != NULL_TREE);
7591 /* Sibcalls are not ok because the arg pointer register is not a fixed
7592 register. This prevents the sibcall optimization from occurring. In
7593 addition, there are problems with stub placement using GNU ld. This
7594 is because a normal sibcall branch uses a 17-bit relocation while
7595 a regular call branch uses a 22-bit relocation. As a result, more
7596 care needs to be taken in the placement of long-branch stubs. */
7601 && !TARGET_PORTABLE_RUNTIME
7602 && !TREE_PUBLIC (decl));
7605 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7606 use in fmpyadd instructions. */
7608 fmpyaddoperands (operands)
7611 enum machine_mode mode = GET_MODE (operands[0]);
7613 /* Must be a floating point mode. */
7614 if (mode != SFmode && mode != DFmode)
7617 /* All modes must be the same. */
7618 if (! (mode == GET_MODE (operands[1])
7619 && mode == GET_MODE (operands[2])
7620 && mode == GET_MODE (operands[3])
7621 && mode == GET_MODE (operands[4])
7622 && mode == GET_MODE (operands[5])))
7625 /* All operands must be registers. */
7626 if (! (GET_CODE (operands[1]) == REG
7627 && GET_CODE (operands[2]) == REG
7628 && GET_CODE (operands[3]) == REG
7629 && GET_CODE (operands[4]) == REG
7630 && GET_CODE (operands[5]) == REG))
7633 /* Only 2 real operands to the addition. One of the input operands must
7634 be the same as the output operand. */
7635 if (! rtx_equal_p (operands[3], operands[4])
7636 && ! rtx_equal_p (operands[3], operands[5]))
7639 /* Inout operand of add can not conflict with any operands from multiply. */
7640 if (rtx_equal_p (operands[3], operands[0])
7641 || rtx_equal_p (operands[3], operands[1])
7642 || rtx_equal_p (operands[3], operands[2]))
7645 /* multiply can not feed into addition operands. */
7646 if (rtx_equal_p (operands[4], operands[0])
7647 || rtx_equal_p (operands[5], operands[0]))
7650 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
7652 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
7653 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
7654 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
7655 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
7656 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
7657 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
7660 /* Passed. Operands are suitable for fmpyadd. */
7664 #if !defined(USE_COLLECT2)
7666 pa_asm_out_constructor (symbol, priority)
7670 if (!function_label_operand (symbol, VOIDmode))
7671 hppa_encode_label (symbol);
7673 #ifdef CTORS_SECTION_ASM_OP
7674 default_ctor_section_asm_out_constructor (symbol, priority);
7676 # ifdef TARGET_ASM_NAMED_SECTION
7677 default_named_section_asm_out_constructor (symbol, priority);
7679 default_stabs_asm_out_constructor (symbol, priority);
7685 pa_asm_out_destructor (symbol, priority)
7689 if (!function_label_operand (symbol, VOIDmode))
7690 hppa_encode_label (symbol);
7692 #ifdef DTORS_SECTION_ASM_OP
7693 default_dtor_section_asm_out_destructor (symbol, priority);
7695 # ifdef TARGET_ASM_NAMED_SECTION
7696 default_named_section_asm_out_destructor (symbol, priority);
7698 default_stabs_asm_out_destructor (symbol, priority);
7704 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7705 use in fmpysub instructions. */
7707 fmpysuboperands (operands)
7710 enum machine_mode mode = GET_MODE (operands[0]);
7712 /* Must be a floating point mode. */
7713 if (mode != SFmode && mode != DFmode)
7716 /* All modes must be the same. */
7717 if (! (mode == GET_MODE (operands[1])
7718 && mode == GET_MODE (operands[2])
7719 && mode == GET_MODE (operands[3])
7720 && mode == GET_MODE (operands[4])
7721 && mode == GET_MODE (operands[5])))
7724 /* All operands must be registers. */
7725 if (! (GET_CODE (operands[1]) == REG
7726 && GET_CODE (operands[2]) == REG
7727 && GET_CODE (operands[3]) == REG
7728 && GET_CODE (operands[4]) == REG
7729 && GET_CODE (operands[5]) == REG))
7732 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
7733 operation, so operands[4] must be the same as operand[3]. */
7734 if (! rtx_equal_p (operands[3], operands[4]))
7737 /* multiply can not feed into subtraction. */
7738 if (rtx_equal_p (operands[5], operands[0]))
7741 /* Inout operand of sub can not conflict with any operands from multiply. */
7742 if (rtx_equal_p (operands[3], operands[0])
7743 || rtx_equal_p (operands[3], operands[1])
7744 || rtx_equal_p (operands[3], operands[2]))
7747 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
7749 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
7750 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
7751 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
7752 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
7753 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
7754 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
7757 /* Passed. Operands are suitable for fmpysub. */
7762 plus_xor_ior_operator (op, mode)
7764 enum machine_mode mode ATTRIBUTE_UNUSED;
7766 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
7767 || GET_CODE (op) == IOR);
7770 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
7771 constants for shadd instructions. */
7773 shadd_constant_p (val)
7776 if (val == 2 || val == 4 || val == 8)
7782 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
7783 the valid constant for shadd instructions. */
7785 shadd_operand (op, mode)
7787 enum machine_mode mode ATTRIBUTE_UNUSED;
7789 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
7792 /* Return 1 if OP is valid as a base register in a reg + reg address. */
7795 basereg_operand (op, mode)
7797 enum machine_mode mode;
7799 /* cse will create some unscaled indexed addresses, however; it
7800 generally isn't a win on the PA, so avoid creating unscaled
7801 indexed addresses until after cse is finished. */
7802 if (!cse_not_expected)
7805 /* Allow any register when TARGET_NO_SPACE_REGS is in effect since
7806 we don't have to worry about the braindamaged implicit space
7807 register selection from the basereg. */
7808 if (TARGET_NO_SPACE_REGS)
7809 return (GET_CODE (op) == REG);
7811 /* While it's always safe to index off the frame pointer, it's not
7812 always profitable, particularly when the frame pointer is being
7814 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
7817 return (GET_CODE (op) == REG
7819 && register_operand (op, mode));
7822 /* Return 1 if this operand is anything other than a hard register. */
7825 non_hard_reg_operand (op, mode)
7827 enum machine_mode mode ATTRIBUTE_UNUSED;
7829 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
7832 /* Return 1 if INSN branches forward. Should be using insn_addresses
7833 to avoid walking through all the insns... */
7835 forward_branch_p (insn)
7838 rtx label = JUMP_LABEL (insn);
7845 insn = NEXT_INSN (insn);
7848 return (insn == label);
7851 /* Return 1 if OP is an equality comparison, else return 0. */
7853 eq_neq_comparison_operator (op, mode)
7855 enum machine_mode mode ATTRIBUTE_UNUSED;
7857 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
7860 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
7862 movb_comparison_operator (op, mode)
7864 enum machine_mode mode ATTRIBUTE_UNUSED;
7866 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
7867 || GET_CODE (op) == LT || GET_CODE (op) == GE);
7870 /* Return 1 if INSN is in the delay slot of a call instruction. */
7872 jump_in_call_delay (insn)
7876 if (GET_CODE (insn) != JUMP_INSN)
7879 if (PREV_INSN (insn)
7880 && PREV_INSN (PREV_INSN (insn))
7881 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
7883 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
7885 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
7886 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
7893 /* Output an unconditional move and branch insn. */
7896 output_parallel_movb (operands, length)
7900 /* These are the cases in which we win. */
7902 return "mov%I1b,tr %1,%0,%2";
7904 /* None of these cases wins, but they don't lose either. */
7905 if (dbr_sequence_length () == 0)
7907 /* Nothing in the delay slot, fake it by putting the combined
7908 insn (the copy or add) in the delay slot of a bl. */
7909 if (GET_CODE (operands[1]) == CONST_INT)
7910 return "b %2\n\tldi %1,%0";
7912 return "b %2\n\tcopy %1,%0";
7916 /* Something in the delay slot, but we've got a long branch. */
7917 if (GET_CODE (operands[1]) == CONST_INT)
7918 return "ldi %1,%0\n\tb %2";
7920 return "copy %1,%0\n\tb %2";
7924 /* Output an unconditional add and branch insn. */
7927 output_parallel_addb (operands, length)
7931 /* To make life easy we want operand0 to be the shared input/output
7932 operand and operand1 to be the readonly operand. */
7933 if (operands[0] == operands[1])
7934 operands[1] = operands[2];
7936 /* These are the cases in which we win. */
7938 return "add%I1b,tr %1,%0,%3";
7940 /* None of these cases win, but they don't lose either. */
7941 if (dbr_sequence_length () == 0)
7943 /* Nothing in the delay slot, fake it by putting the combined
7944 insn (the copy or add) in the delay slot of a bl. */
7945 return "b %3\n\tadd%I1 %1,%0,%0";
7949 /* Something in the delay slot, but we've got a long branch. */
7950 return "add%I1 %1,%0,%0\n\tb %3";
7954 /* Return nonzero if INSN (a jump insn) immediately follows a call
7955 to a named function. This is used to avoid filling the delay slot
7956 of the jump since it can usually be eliminated by modifying RP in
7957 the delay slot of the call. */
7960 following_call (insn)
7963 if (! TARGET_JUMP_IN_DELAY)
7966 /* Find the previous real insn, skipping NOTEs. */
7967 insn = PREV_INSN (insn);
7968 while (insn && GET_CODE (insn) == NOTE)
7969 insn = PREV_INSN (insn);
7971 /* Check for CALL_INSNs and millicode calls. */
7973 && ((GET_CODE (insn) == CALL_INSN
7974 && get_attr_type (insn) != TYPE_DYNCALL)
7975 || (GET_CODE (insn) == INSN
7976 && GET_CODE (PATTERN (insn)) != SEQUENCE
7977 && GET_CODE (PATTERN (insn)) != USE
7978 && GET_CODE (PATTERN (insn)) != CLOBBER
7979 && get_attr_type (insn) == TYPE_MILLI)))
7985 /* We use this hook to perform a PA specific optimization which is difficult
7986 to do in earlier passes.
7988 We want the delay slots of branches within jump tables to be filled.
7989 None of the compiler passes at the moment even has the notion that a
7990 PA jump table doesn't contain addresses, but instead contains actual
7993 Because we actually jump into the table, the addresses of each entry
7994 must stay constant in relation to the beginning of the table (which
7995 itself must stay constant relative to the instruction to jump into
7996 it). I don't believe we can guarantee earlier passes of the compiler
7997 will adhere to those rules.
7999 So, late in the compilation process we find all the jump tables, and
8000 expand them into real code -- eg each entry in the jump table vector
8001 will get an appropriate label followed by a jump to the final target.
8003 Reorg and the final jump pass can then optimize these branches and
8004 fill their delay slots. We end up with smaller, more efficient code.
8006 The jump instructions within the table are special; we must be able
8007 to identify them during assembly output (if the jumps don't get filled
8008 we need to emit a nop rather than nullifying the delay slot)). We
8009 identify jumps in switch tables by marking the SET with DImode.
8011 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8012 insns. This serves two purposes, first it prevents jump.c from
8013 noticing that the last N entries in the table jump to the instruction
8014 immediately after the table and deleting the jumps. Second, those
8015 insns mark where we should emit .begin_brtab and .end_brtab directives
8016 when using GAS (allows for better link time optimizations). */
8023 remove_useless_addtr_insns (1);
8025 if (pa_cpu < PROCESSOR_8000)
8026 pa_combine_instructions ();
8029 /* This is fairly cheap, so always run it if optimizing. */
8030 if (optimize > 0 && !TARGET_BIG_SWITCH)
8032 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8033 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8035 rtx pattern, tmp, location;
8036 unsigned int length, i;
8038 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8039 if (GET_CODE (insn) != JUMP_INSN
8040 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8041 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8044 /* Emit marker for the beginning of the branch table. */
8045 emit_insn_before (gen_begin_brtab (), insn);
8047 pattern = PATTERN (insn);
8048 location = PREV_INSN (insn);
8049 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8051 for (i = 0; i < length; i++)
8053 /* Emit a label before each jump to keep jump.c from
8054 removing this code. */
8055 tmp = gen_label_rtx ();
8056 LABEL_NUSES (tmp) = 1;
8057 emit_label_after (tmp, location);
8058 location = NEXT_INSN (location);
8060 if (GET_CODE (pattern) == ADDR_VEC)
8062 /* Emit the jump itself. */
8063 tmp = gen_jump (XEXP (XVECEXP (pattern, 0, i), 0));
8064 tmp = emit_jump_insn_after (tmp, location);
8065 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
8066 /* It is easy to rely on the branch table markers
8067 during assembly output to trigger the correct code
8068 for a switch table jump with an unfilled delay slot,
8070 However, that requires state and assumes that we look
8073 We can't make such assumptions when computing the length
8074 of instructions. Ugh. We could walk the insn chain to
8075 determine if this instruction is in a branch table, but
8076 that can get rather expensive, particularly during the
8077 branch shortening phase of the compiler.
8079 So instead we mark this jump as being special. This is
8080 far from ideal and knows that no code after this will
8081 muck around with the mode of the JUMP_INSN itself. */
8082 PUT_MODE (tmp, SImode);
8083 LABEL_NUSES (JUMP_LABEL (tmp))++;
8084 location = NEXT_INSN (location);
8088 /* Emit the jump itself. */
8089 tmp = gen_jump (XEXP (XVECEXP (pattern, 1, i), 0));
8090 tmp = emit_jump_insn_after (tmp, location);
8091 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0);
8092 /* It is easy to rely on the branch table markers
8093 during assembly output to trigger the correct code
8094 for a switch table jump with an unfilled delay slot,
8096 However, that requires state and assumes that we look
8099 We can't make such assumptions when computing the length
8100 of instructions. Ugh. We could walk the insn chain to
8101 determine if this instruction is in a branch table, but
8102 that can get rather expensive, particularly during the
8103 branch shortening phase of the compiler.
8105 So instead we mark this jump as being special. This is
8106 far from ideal and knows that no code after this will
8107 muck around with the mode of the JUMP_INSN itself. */
8108 PUT_MODE (tmp, SImode);
8109 LABEL_NUSES (JUMP_LABEL (tmp))++;
8110 location = NEXT_INSN (location);
8113 /* Emit a BARRIER after the jump. */
8114 emit_barrier_after (location);
8115 location = NEXT_INSN (location);
8118 /* Emit marker for the end of the branch table. */
8119 emit_insn_before (gen_end_brtab (), location);
8120 location = NEXT_INSN (location);
8121 emit_barrier_after (location);
8123 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8129 /* Sill need an end_brtab insn. */
8130 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8132 /* Find an ADDR_VEC insn. */
8133 if (GET_CODE (insn) != JUMP_INSN
8134 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8135 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8138 /* Now generate markers for the beginning and end of the
8140 emit_insn_before (gen_begin_brtab (), insn);
8141 emit_insn_after (gen_end_brtab (), insn);
8146 /* The PA has a number of odd instructions which can perform multiple
8147 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8148 it may be profitable to combine two instructions into one instruction
8149 with two outputs. It's not profitable PA2.0 machines because the
8150 two outputs would take two slots in the reorder buffers.
8152 This routine finds instructions which can be combined and combines
8153 them. We only support some of the potential combinations, and we
8154 only try common ways to find suitable instructions.
8156 * addb can add two registers or a register and a small integer
8157 and jump to a nearby (+-8k) location. Normally the jump to the
8158 nearby location is conditional on the result of the add, but by
8159 using the "true" condition we can make the jump unconditional.
8160 Thus addb can perform two independent operations in one insn.
8162 * movb is similar to addb in that it can perform a reg->reg
8163 or small immediate->reg copy and jump to a nearby (+-8k location).
8165 * fmpyadd and fmpysub can perform a FP multiply and either an
8166 FP add or FP sub if the operands of the multiply and add/sub are
8167 independent (there are other minor restrictions). Note both
8168 the fmpy and fadd/fsub can in theory move to better spots according
8169 to data dependencies, but for now we require the fmpy stay at a
8172 * Many of the memory operations can perform pre & post updates
8173 of index registers. GCC's pre/post increment/decrement addressing
8174 is far too simple to take advantage of all the possibilities. This
8175 pass may not be suitable since those insns may not be independent.
8177 * comclr can compare two ints or an int and a register, nullify
8178 the following instruction and zero some other register. This
8179 is more difficult to use as it's harder to find an insn which
8180 will generate a comclr than finding something like an unconditional
8181 branch. (conditional moves & long branches create comclr insns).
8183 * Most arithmetic operations can conditionally skip the next
8184 instruction. They can be viewed as "perform this operation
8185 and conditionally jump to this nearby location" (where nearby
8186 is an insns away). These are difficult to use due to the
8187 branch length restrictions. */
8190 pa_combine_instructions ()
8194 /* This can get expensive since the basic algorithm is on the
8195 order of O(n^2) (or worse). Only do it for -O2 or higher
8196 levels of optimization. */
8200 /* Walk down the list of insns looking for "anchor" insns which
8201 may be combined with "floating" insns. As the name implies,
8202 "anchor" instructions don't move, while "floating" insns may
8204 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8205 new = make_insn_raw (new);
8207 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8209 enum attr_pa_combine_type anchor_attr;
8210 enum attr_pa_combine_type floater_attr;
8212 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8213 Also ignore any special USE insns. */
8214 if ((GET_CODE (anchor) != INSN
8215 && GET_CODE (anchor) != JUMP_INSN
8216 && GET_CODE (anchor) != CALL_INSN)
8217 || GET_CODE (PATTERN (anchor)) == USE
8218 || GET_CODE (PATTERN (anchor)) == CLOBBER
8219 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8220 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8223 anchor_attr = get_attr_pa_combine_type (anchor);
8224 /* See if anchor is an insn suitable for combination. */
8225 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8226 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8227 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8228 && ! forward_branch_p (anchor)))
8232 for (floater = PREV_INSN (anchor);
8234 floater = PREV_INSN (floater))
8236 if (GET_CODE (floater) == NOTE
8237 || (GET_CODE (floater) == INSN
8238 && (GET_CODE (PATTERN (floater)) == USE
8239 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8242 /* Anything except a regular INSN will stop our search. */
8243 if (GET_CODE (floater) != INSN
8244 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8245 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8251 /* See if FLOATER is suitable for combination with the
8253 floater_attr = get_attr_pa_combine_type (floater);
8254 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8255 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8256 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8257 && floater_attr == PA_COMBINE_TYPE_FMPY))
8259 /* If ANCHOR and FLOATER can be combined, then we're
8260 done with this pass. */
8261 if (pa_can_combine_p (new, anchor, floater, 0,
8262 SET_DEST (PATTERN (floater)),
8263 XEXP (SET_SRC (PATTERN (floater)), 0),
8264 XEXP (SET_SRC (PATTERN (floater)), 1)))
8268 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8269 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8271 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8273 if (pa_can_combine_p (new, anchor, floater, 0,
8274 SET_DEST (PATTERN (floater)),
8275 XEXP (SET_SRC (PATTERN (floater)), 0),
8276 XEXP (SET_SRC (PATTERN (floater)), 1)))
8281 if (pa_can_combine_p (new, anchor, floater, 0,
8282 SET_DEST (PATTERN (floater)),
8283 SET_SRC (PATTERN (floater)),
8284 SET_SRC (PATTERN (floater))))
8290 /* If we didn't find anything on the backwards scan try forwards. */
8292 && (anchor_attr == PA_COMBINE_TYPE_FMPY
8293 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8295 for (floater = anchor; floater; floater = NEXT_INSN (floater))
8297 if (GET_CODE (floater) == NOTE
8298 || (GET_CODE (floater) == INSN
8299 && (GET_CODE (PATTERN (floater)) == USE
8300 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8304 /* Anything except a regular INSN will stop our search. */
8305 if (GET_CODE (floater) != INSN
8306 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8307 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8313 /* See if FLOATER is suitable for combination with the
8315 floater_attr = get_attr_pa_combine_type (floater);
8316 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8317 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8318 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8319 && floater_attr == PA_COMBINE_TYPE_FMPY))
8321 /* If ANCHOR and FLOATER can be combined, then we're
8322 done with this pass. */
8323 if (pa_can_combine_p (new, anchor, floater, 1,
8324 SET_DEST (PATTERN (floater)),
8325 XEXP (SET_SRC (PATTERN (floater)),
8327 XEXP (SET_SRC (PATTERN (floater)),
8334 /* FLOATER will be nonzero if we found a suitable floating
8335 insn for combination with ANCHOR. */
8337 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8338 || anchor_attr == PA_COMBINE_TYPE_FMPY))
8340 /* Emit the new instruction and delete the old anchor. */
8341 emit_insn_before (gen_rtx_PARALLEL
8343 gen_rtvec (2, PATTERN (anchor),
8344 PATTERN (floater))),
8347 PUT_CODE (anchor, NOTE);
8348 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8349 NOTE_SOURCE_FILE (anchor) = 0;
8351 /* Emit a special USE insn for FLOATER, then delete
8352 the floating insn. */
8353 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8354 delete_insn (floater);
8359 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8362 /* Emit the new_jump instruction and delete the old anchor. */
8364 = emit_jump_insn_before (gen_rtx_PARALLEL
8366 gen_rtvec (2, PATTERN (anchor),
8367 PATTERN (floater))),
8370 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8371 PUT_CODE (anchor, NOTE);
8372 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8373 NOTE_SOURCE_FILE (anchor) = 0;
8375 /* Emit a special USE insn for FLOATER, then delete
8376 the floating insn. */
8377 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8378 delete_insn (floater);
8386 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
8387 rtx new, anchor, floater;
8389 rtx dest, src1, src2;
8391 int insn_code_number;
8394 /* Create a PARALLEL with the patterns of ANCHOR and
8395 FLOATER, try to recognize it, then test constraints
8396 for the resulting pattern.
8398 If the pattern doesn't match or the constraints
8399 aren't met keep searching for a suitable floater
8401 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8402 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8403 INSN_CODE (new) = -1;
8404 insn_code_number = recog_memoized (new);
8405 if (insn_code_number < 0
8406 || (extract_insn (new), ! constrain_operands (1)))
8420 /* There's up to three operands to consider. One
8421 output and two inputs.
8423 The output must not be used between FLOATER & ANCHOR
8424 exclusive. The inputs must not be set between
8425 FLOATER and ANCHOR exclusive. */
8427 if (reg_used_between_p (dest, start, end))
8430 if (reg_set_between_p (src1, start, end))
8433 if (reg_set_between_p (src2, start, end))
8436 /* If we get here, then everything is good. */
8440 /* Return nonzero if references for INSN are delayed.
8442 Millicode insns are actually function calls with some special
8443 constraints on arguments and register usage.
8445 Millicode calls always expect their arguments in the integer argument
8446 registers, and always return their result in %r29 (ret1). They
8447 are expected to clobber their arguments, %r1, %r29, and the return
8448 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8450 This function tells reorg that the references to arguments and
8451 millicode calls do not appear to happen until after the millicode call.
8452 This allows reorg to put insns which set the argument registers into the
8453 delay slot of the millicode call -- thus they act more like traditional
8456 Note we can not consider side effects of the insn to be delayed because
8457 the branch and link insn will clobber the return pointer. If we happened
8458 to use the return pointer in the delay slot of the call, then we lose.
8460 get_attr_type will try to recognize the given insn, so make sure to
8461 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8464 insn_refs_are_delayed (insn)
8467 return ((GET_CODE (insn) == INSN
8468 && GET_CODE (PATTERN (insn)) != SEQUENCE
8469 && GET_CODE (PATTERN (insn)) != USE
8470 && GET_CODE (PATTERN (insn)) != CLOBBER
8471 && get_attr_type (insn) == TYPE_MILLI));
8474 /* On the HP-PA the value is found in register(s) 28(-29), unless
8475 the mode is SF or DF. Then the value is returned in fr4 (32).
8477 This must perform the same promotions as PROMOTE_MODE, else
8478 PROMOTE_FUNCTION_RETURN will not work correctly.
8480 Small structures must be returned in a PARALLEL on PA64 in order
8481 to match the HP Compiler ABI. */
8484 function_value (valtype, func)
8486 tree func ATTRIBUTE_UNUSED;
8488 enum machine_mode valmode;
8490 /* Aggregates with a size less than or equal to 128 bits are returned
8491 in GR 28(-29). They are left justified. The pad bits are undefined.
8492 Larger aggregates are returned in memory. */
8493 if (TARGET_64BIT && AGGREGATE_TYPE_P (valtype))
8497 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
8499 for (i = 0; i < ub; i++)
8501 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8502 gen_rtx_REG (DImode, 28 + i),
8507 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
8510 if ((INTEGRAL_TYPE_P (valtype)
8511 && TYPE_PRECISION (valtype) < BITS_PER_WORD)
8512 || POINTER_TYPE_P (valtype))
8513 valmode = word_mode;
8515 valmode = TYPE_MODE (valtype);
8517 if (TREE_CODE (valtype) == REAL_TYPE
8518 && TYPE_MODE (valtype) != TFmode
8519 && !TARGET_SOFT_FLOAT)
8520 return gen_rtx_REG (valmode, 32);
8522 return gen_rtx_REG (valmode, 28);
8525 /* Return the location of a parameter that is passed in a register or NULL
8526 if the parameter has any component that is passed in memory.
8528 This is new code and will be pushed to into the net sources after
8531 ??? We might want to restructure this so that it looks more like other
8534 function_arg (cum, mode, type, named)
8535 CUMULATIVE_ARGS *cum;
8536 enum machine_mode mode;
8538 int named ATTRIBUTE_UNUSED;
8540 int max_arg_words = (TARGET_64BIT ? 8 : 4);
8547 if (mode == VOIDmode)
8550 arg_size = FUNCTION_ARG_SIZE (mode, type);
8552 /* If this arg would be passed partially or totally on the stack, then
8553 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
8554 handle arguments which are split between regs and stack slots if
8555 the ABI mandates split arguments. */
8558 /* The 32-bit ABI does not split arguments. */
8559 if (cum->words + arg_size > max_arg_words)
8565 alignment = cum->words & 1;
8566 if (cum->words + alignment >= max_arg_words)
8570 /* The 32bit ABIs and the 64bit ABIs are rather different,
8571 particularly in their handling of FP registers. We might
8572 be able to cleverly share code between them, but I'm not
8573 going to bother in the hope that splitting them up results
8574 in code that is more easily understood. */
8578 /* Advance the base registers to their current locations.
8580 Remember, gprs grow towards smaller register numbers while
8581 fprs grow to higher register numbers. Also remember that
8582 although FP regs are 32-bit addressable, we pretend that
8583 the registers are 64-bits wide. */
8584 gpr_reg_base = 26 - cum->words;
8585 fpr_reg_base = 32 + cum->words;
8587 /* Arguments wider than one word and small aggregates need special
8591 || (type && AGGREGATE_TYPE_P (type)))
8593 /* Double-extended precision (80-bit), quad-precision (128-bit)
8594 and aggregates including complex numbers are aligned on
8595 128-bit boundaries. The first eight 64-bit argument slots
8596 are associated one-to-one, with general registers r26
8597 through r19, and also with floating-point registers fr4
8598 through fr11. Arguments larger than one word are always
8599 passed in general registers.
8601 Using a PARALLEL with a word mode register results in left
8602 justified data on a big-endian target. */
8605 int i, offset = 0, ub = arg_size;
8607 /* Align the base register. */
8608 gpr_reg_base -= alignment;
8610 ub = MIN (ub, max_arg_words - cum->words - alignment);
8611 for (i = 0; i < ub; i++)
8613 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8614 gen_rtx_REG (DImode, gpr_reg_base),
8620 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
8625 /* If the argument is larger than a word, then we know precisely
8626 which registers we must use. */
8640 /* Structures 5 to 8 bytes in size are passed in the general
8641 registers in the same manner as other non floating-point
8642 objects. The data is right-justified and zero-extended
8645 This is magic. Normally, using a PARALLEL results in left
8646 justified data on a big-endian target. However, using a
8647 single double-word register provides the required right
8648 justification for 5 to 8 byte structures. This has nothing
8649 to do with the direction of padding specified for the argument.
8650 It has to do with how the data is widened and shifted into
8651 and from the register.
8653 Aside from adding load_multiple and store_multiple patterns,
8654 this is the only way that I have found to obtain right
8655 justification of BLKmode data when it has a size greater
8656 than one word. Splitting the operation into two SImode loads
8657 or returning a DImode REG results in left justified data. */
8658 if (mode == BLKmode)
8660 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
8661 gen_rtx_REG (DImode, gpr_reg_base),
8663 return gen_rtx_PARALLEL (mode, gen_rtvec (1, loc));
8668 /* We have a single word (32 bits). A simple computation
8669 will get us the register #s we need. */
8670 gpr_reg_base = 26 - cum->words;
8671 fpr_reg_base = 32 + 2 * cum->words;
8675 /* Determine if the argument needs to be passed in both general and
8676 floating point registers. */
8677 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
8678 /* If we are doing soft-float with portable runtime, then there
8679 is no need to worry about FP regs. */
8680 && !TARGET_SOFT_FLOAT
8681 /* The parameter must be some kind of float, else we can just
8682 pass it in integer registers. */
8683 && FLOAT_MODE_P (mode)
8684 /* The target function must not have a prototype. */
8685 && cum->nargs_prototype <= 0
8686 /* libcalls do not need to pass items in both FP and general
8688 && type != NULL_TREE
8689 /* All this hair applies to "outgoing" args only. This includes
8690 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
8692 /* Also pass outgoing floating arguments in both registers in indirect
8693 calls with the 32 bit ABI and the HP assembler since there is no
8694 way to the specify argument locations in static functions. */
8699 && FLOAT_MODE_P (mode)))
8705 gen_rtx_EXPR_LIST (VOIDmode,
8706 gen_rtx_REG (mode, fpr_reg_base),
8708 gen_rtx_EXPR_LIST (VOIDmode,
8709 gen_rtx_REG (mode, gpr_reg_base),
8714 /* See if we should pass this parameter in a general register. */
8715 if (TARGET_SOFT_FLOAT
8716 /* Indirect calls in the normal 32bit ABI require all arguments
8717 to be passed in general registers. */
8718 || (!TARGET_PORTABLE_RUNTIME
8722 /* If the parameter is not a floating point parameter, then
8723 it belongs in GPRs. */
8724 || !FLOAT_MODE_P (mode))
8725 retval = gen_rtx_REG (mode, gpr_reg_base);
8727 retval = gen_rtx_REG (mode, fpr_reg_base);
8733 /* If this arg would be passed totally in registers or totally on the stack,
8734 then this routine should return zero. It is currently called only for
8735 the 64-bit target. */
8737 function_arg_partial_nregs (cum, mode, type, named)
8738 CUMULATIVE_ARGS *cum;
8739 enum machine_mode mode;
8741 int named ATTRIBUTE_UNUSED;
8743 unsigned int max_arg_words = 8;
8744 unsigned int offset = 0;
8746 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
8749 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
8750 /* Arg fits fully into registers. */
8752 else if (cum->words + offset >= max_arg_words)
8753 /* Arg fully on the stack. */
8757 return max_arg_words - cum->words - offset;
8761 /* Return 1 if this is a comparison operator. This allows the use of
8762 MATCH_OPERATOR to recognize all the branch insns. */
8765 cmpib_comparison_operator (op, mode)
8767 enum machine_mode mode;
8769 return ((mode == VOIDmode || GET_MODE (op) == mode)
8770 && (GET_CODE (op) == EQ
8771 || GET_CODE (op) == NE
8772 || GET_CODE (op) == GT
8773 || GET_CODE (op) == GTU
8774 || GET_CODE (op) == GE
8775 || GET_CODE (op) == LT
8776 || GET_CODE (op) == LE
8777 || GET_CODE (op) == LEU));
8780 /* On hpux10, the linker will give an error if we have a reference
8781 in the read-only data section to a symbol defined in a shared
8782 library. Therefore, expressions that might require a reloc can
8783 not be placed in the read-only data section. */
8786 pa_select_section (exp, reloc, align)
8789 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED;
8791 if (TREE_CODE (exp) == VAR_DECL
8792 && TREE_READONLY (exp)
8793 && !TREE_THIS_VOLATILE (exp)
8794 && DECL_INITIAL (exp)
8795 && (DECL_INITIAL (exp) == error_mark_node
8796 || TREE_CONSTANT (DECL_INITIAL (exp)))
8798 readonly_data_section ();
8799 else if (TREE_CODE_CLASS (TREE_CODE (exp)) == 'c'
8800 && !(TREE_CODE (exp) == STRING_CST && flag_writable_strings)
8802 readonly_data_section ();
8808 pa_globalize_label (stream, name)
8812 /* We only handle DATA objects here, functions are globalized in
8813 ASM_DECLARE_FUNCTION_NAME. */
8814 if (! FUNCTION_NAME_P (name))
8816 fputs ("\t.EXPORT ", stream);
8817 assemble_name (stream, name);
8818 fputs (",DATA\n", stream);