1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "integrate.h"
50 #include "target-def.h"
52 static int hppa_use_dfa_pipeline_interface (void);
54 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
55 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hppa_use_dfa_pipeline_interface
58 hppa_use_dfa_pipeline_interface (void)
63 /* Return nonzero if there is a bypass for the output of
64 OUT_INSN and the fp store IN_INSN. */
66 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
68 enum machine_mode store_mode;
69 enum machine_mode other_mode;
72 if (recog_memoized (in_insn) < 0
73 || get_attr_type (in_insn) != TYPE_FPSTORE
74 || recog_memoized (out_insn) < 0)
77 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
79 set = single_set (out_insn);
83 other_mode = GET_MODE (SET_SRC (set));
85 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
89 #ifndef DO_FRAME_NOTES
90 #ifdef INCOMING_RETURN_ADDR_RTX
91 #define DO_FRAME_NOTES 1
93 #define DO_FRAME_NOTES 0
97 static int hppa_address_cost (rtx);
98 static bool hppa_rtx_costs (rtx, int, int, int *);
99 static inline rtx force_mode (enum machine_mode, rtx);
100 static void pa_reorg (void);
101 static void pa_combine_instructions (void);
102 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
103 static int forward_branch_p (rtx);
104 static int shadd_constant_p (int);
105 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
106 static int compute_movstr_length (rtx);
107 static int compute_clrstr_length (rtx);
108 static bool pa_assemble_integer (rtx, unsigned int, int);
109 static void remove_useless_addtr_insns (int);
110 static void store_reg (int, int, int);
111 static void store_reg_modify (int, int, int);
112 static void load_reg (int, int, int);
113 static void set_reg_plus_d (int, int, int, int);
114 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
115 static void update_total_code_bytes (int);
116 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
117 static int pa_adjust_cost (rtx, rtx, rtx, int);
118 static int pa_adjust_priority (rtx, int);
119 static int pa_issue_rate (void);
120 static void pa_select_section (tree, int, unsigned HOST_WIDE_INT)
122 static void pa_encode_section_info (tree, rtx, int);
123 static const char *pa_strip_name_encoding (const char *);
124 static bool pa_function_ok_for_sibcall (tree, tree);
125 static void pa_globalize_label (FILE *, const char *)
127 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
128 HOST_WIDE_INT, tree);
129 #if !defined(USE_COLLECT2)
130 static void pa_asm_out_constructor (rtx, int);
131 static void pa_asm_out_destructor (rtx, int);
133 static void pa_init_builtins (void);
134 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
135 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
136 static struct deferred_plabel *get_plabel (const char *)
138 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
139 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
140 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
141 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
142 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
143 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
144 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
145 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
146 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
147 static void output_deferred_plabels (void);
148 #ifdef HPUX_LONG_DOUBLE_LIBRARY
149 static void pa_hpux_init_libfuncs (void);
152 /* Save the operands last given to a compare for use when we
153 generate a scc or bcc insn. */
154 rtx hppa_compare_op0, hppa_compare_op1;
155 enum cmp_type hppa_branch_type;
157 /* Which cpu we are scheduling for. */
158 enum processor_type pa_cpu;
160 /* String to hold which cpu we are scheduling for. */
161 const char *pa_cpu_string;
163 /* Which architecture we are generating code for. */
164 enum architecture_type pa_arch;
166 /* String to hold which architecture we are generating code for. */
167 const char *pa_arch_string;
169 /* Counts for the number of callee-saved general and floating point
170 registers which were saved by the current function's prologue. */
171 static int gr_saved, fr_saved;
173 static rtx find_addr_reg (rtx);
175 /* Keep track of the number of bytes we have output in the CODE subspace
176 during this compilation so we'll know when to emit inline long-calls. */
177 unsigned long total_code_bytes;
179 /* The last address of the previous function plus the number of bytes in
180 associated thunks that have been output. This is used to determine if
181 a thunk can use an IA-relative branch to reach its target function. */
182 static int last_address;
184 /* Variables to handle plabels that we discover are necessary at assembly
185 output time. They are output after the current function. */
186 struct deferred_plabel GTY(())
191 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
193 static size_t n_deferred_plabels = 0;
196 /* Initialize the GCC target structure. */
198 #undef TARGET_ASM_ALIGNED_HI_OP
199 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
200 #undef TARGET_ASM_ALIGNED_SI_OP
201 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
202 #undef TARGET_ASM_ALIGNED_DI_OP
203 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
204 #undef TARGET_ASM_UNALIGNED_HI_OP
205 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
206 #undef TARGET_ASM_UNALIGNED_SI_OP
207 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
208 #undef TARGET_ASM_UNALIGNED_DI_OP
209 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
210 #undef TARGET_ASM_INTEGER
211 #define TARGET_ASM_INTEGER pa_assemble_integer
213 #undef TARGET_ASM_FUNCTION_PROLOGUE
214 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
215 #undef TARGET_ASM_FUNCTION_EPILOGUE
216 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
218 #undef TARGET_SCHED_ADJUST_COST
219 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
220 #undef TARGET_SCHED_ADJUST_PRIORITY
221 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
222 #undef TARGET_SCHED_ISSUE_RATE
223 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
225 #undef TARGET_ENCODE_SECTION_INFO
226 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
227 #undef TARGET_STRIP_NAME_ENCODING
228 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
230 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
231 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
233 #undef TARGET_ASM_OUTPUT_MI_THUNK
234 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
235 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
236 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
238 #undef TARGET_ASM_FILE_END
239 #define TARGET_ASM_FILE_END output_deferred_plabels
241 #if !defined(USE_COLLECT2)
242 #undef TARGET_ASM_CONSTRUCTOR
243 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
244 #undef TARGET_ASM_DESTRUCTOR
245 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
248 #undef TARGET_INIT_BUILTINS
249 #define TARGET_INIT_BUILTINS pa_init_builtins
251 #undef TARGET_RTX_COSTS
252 #define TARGET_RTX_COSTS hppa_rtx_costs
253 #undef TARGET_ADDRESS_COST
254 #define TARGET_ADDRESS_COST hppa_address_cost
256 #undef TARGET_MACHINE_DEPENDENT_REORG
257 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
259 #ifdef HPUX_LONG_DOUBLE_LIBRARY
260 #undef TARGET_INIT_LIBFUNCS
261 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
264 struct gcc_target targetm = TARGET_INITIALIZER;
267 override_options (void)
269 if (pa_cpu_string == NULL)
270 pa_cpu_string = TARGET_SCHED_DEFAULT;
272 if (! strcmp (pa_cpu_string, "8000"))
274 pa_cpu_string = "8000";
275 pa_cpu = PROCESSOR_8000;
277 else if (! strcmp (pa_cpu_string, "7100"))
279 pa_cpu_string = "7100";
280 pa_cpu = PROCESSOR_7100;
282 else if (! strcmp (pa_cpu_string, "700"))
284 pa_cpu_string = "700";
285 pa_cpu = PROCESSOR_700;
287 else if (! strcmp (pa_cpu_string, "7100LC"))
289 pa_cpu_string = "7100LC";
290 pa_cpu = PROCESSOR_7100LC;
292 else if (! strcmp (pa_cpu_string, "7200"))
294 pa_cpu_string = "7200";
295 pa_cpu = PROCESSOR_7200;
297 else if (! strcmp (pa_cpu_string, "7300"))
299 pa_cpu_string = "7300";
300 pa_cpu = PROCESSOR_7300;
304 warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, 7300, and 8000\n", pa_cpu_string);
307 /* Set the instruction set architecture. */
308 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
310 pa_arch_string = "1.0";
311 pa_arch = ARCHITECTURE_10;
312 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
314 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
316 pa_arch_string = "1.1";
317 pa_arch = ARCHITECTURE_11;
318 target_flags &= ~MASK_PA_20;
319 target_flags |= MASK_PA_11;
321 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
323 pa_arch_string = "2.0";
324 pa_arch = ARCHITECTURE_20;
325 target_flags |= MASK_PA_11 | MASK_PA_20;
327 else if (pa_arch_string)
329 warning ("unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
332 /* Unconditional branches in the delay slot are not compatible with dwarf2
333 call frame information. There is no benefit in using this optimization
334 on PA8000 and later processors. */
335 if (pa_cpu >= PROCESSOR_8000
336 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
337 || flag_unwind_tables)
338 target_flags &= ~MASK_JUMP_IN_DELAY;
340 if (flag_pic && TARGET_PORTABLE_RUNTIME)
342 warning ("PIC code generation is not supported in the portable runtime model\n");
345 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
347 warning ("PIC code generation is not compatible with fast indirect calls\n");
350 if (! TARGET_GAS && write_symbols != NO_DEBUG)
352 warning ("-g is only supported when using GAS on this processor,");
353 warning ("-g option disabled");
354 write_symbols = NO_DEBUG;
357 /* We only support the "big PIC" model now. And we always generate PIC
358 code when in 64bit mode. */
359 if (flag_pic == 1 || TARGET_64BIT)
362 /* We can't guarantee that .dword is available for 32-bit targets. */
363 if (UNITS_PER_WORD == 4)
364 targetm.asm_out.aligned_op.di = NULL;
366 /* The unaligned ops are only available when using GAS. */
369 targetm.asm_out.unaligned_op.hi = NULL;
370 targetm.asm_out.unaligned_op.si = NULL;
371 targetm.asm_out.unaligned_op.di = NULL;
376 pa_init_builtins (void)
378 #ifdef DONT_HAVE_FPUTC_UNLOCKED
379 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
380 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
384 /* Return nonzero only if OP is a register of mode MODE,
387 reg_or_0_operand (rtx op, enum machine_mode mode)
389 return (op == CONST0_RTX (mode) || register_operand (op, mode));
392 /* Return nonzero if OP is suitable for use in a call to a named
395 For 2.5 try to eliminate either call_operand_address or
396 function_label_operand, they perform very similar functions. */
398 call_operand_address (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
400 return (GET_MODE (op) == word_mode
401 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
404 /* Return 1 if X contains a symbolic expression. We know these
405 expressions will have one of a few well defined forms, so
406 we need only check those forms. */
408 symbolic_expression_p (rtx x)
411 /* Strip off any HIGH. */
412 if (GET_CODE (x) == HIGH)
415 return (symbolic_operand (x, VOIDmode));
419 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
421 switch (GET_CODE (op))
428 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
429 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
430 && GET_CODE (XEXP (op, 1)) == CONST_INT);
436 /* Return truth value of statement that OP is a symbolic memory
437 operand of mode MODE. */
440 symbolic_memory_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
442 if (GET_CODE (op) == SUBREG)
443 op = SUBREG_REG (op);
444 if (GET_CODE (op) != MEM)
447 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
448 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
451 /* Return 1 if the operand is either a register or a memory operand that is
455 reg_or_nonsymb_mem_operand (rtx op, enum machine_mode mode)
457 if (register_operand (op, mode))
460 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
466 /* Return 1 if the operand is either a register, zero, or a memory operand
467 that is not symbolic. */
470 reg_or_0_or_nonsymb_mem_operand (rtx op, enum machine_mode mode)
472 if (register_operand (op, mode))
475 if (op == CONST0_RTX (mode))
478 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
484 /* Return 1 if the operand is a register operand or a non-symbolic memory
485 operand after reload. This predicate is used for branch patterns that
486 internally handle register reloading. We need to accept non-symbolic
487 memory operands after reload to ensure that the pattern is still valid
488 if reload didn't find a hard register for the operand. */
491 reg_before_reload_operand (rtx op, enum machine_mode mode)
493 /* Don't accept a SUBREG since it will need a reload. */
494 if (GET_CODE (op) == SUBREG)
497 if (register_operand (op, mode))
501 && memory_operand (op, mode)
502 && ! symbolic_memory_operand (op, mode))
508 /* Accept any constant that can be moved in one instruction into a
511 cint_ok_for_move (HOST_WIDE_INT intval)
513 /* OK if ldo, ldil, or zdepi, can be used. */
514 return (CONST_OK_FOR_LETTER_P (intval, 'J')
515 || CONST_OK_FOR_LETTER_P (intval, 'N')
516 || CONST_OK_FOR_LETTER_P (intval, 'K'));
519 /* Accept anything that can be moved in one instruction into a general
522 move_operand (rtx op, enum machine_mode mode)
524 if (register_operand (op, mode))
527 if (GET_CODE (op) == CONSTANT_P_RTX)
530 if (GET_CODE (op) == CONST_INT)
531 return cint_ok_for_move (INTVAL (op));
533 if (GET_CODE (op) == SUBREG)
534 op = SUBREG_REG (op);
535 if (GET_CODE (op) != MEM)
540 /* We consider a LO_SUM DLT reference a move_operand now since it has
541 been merged into the normal movsi/movdi patterns. */
542 if (GET_CODE (op) == LO_SUM
543 && GET_CODE (XEXP (op, 0)) == REG
544 && REG_OK_FOR_BASE_P (XEXP (op, 0))
545 && GET_CODE (XEXP (op, 1)) == UNSPEC
546 && GET_MODE (op) == Pmode)
549 /* Since move_operand is only used for source operands, we can always
550 allow scaled indexing! */
551 if (! TARGET_DISABLE_INDEXING
552 && GET_CODE (op) == PLUS
553 && ((GET_CODE (XEXP (op, 0)) == MULT
554 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
555 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
556 && INTVAL (XEXP (XEXP (op, 0), 1))
557 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
558 && GET_CODE (XEXP (op, 1)) == REG)
559 || (GET_CODE (XEXP (op, 1)) == MULT
560 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
561 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
562 && INTVAL (XEXP (XEXP (op, 1), 1))
563 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
564 && GET_CODE (XEXP (op, 0)) == REG)))
567 return memory_address_p (mode, op);
570 /* Accept REG and any CONST_INT that can be moved in one instruction into a
573 reg_or_cint_move_operand (rtx op, enum machine_mode mode)
575 if (register_operand (op, mode))
578 if (GET_CODE (op) == CONST_INT)
579 return cint_ok_for_move (INTVAL (op));
585 pic_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
590 switch (GET_CODE (op))
596 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
597 && GET_CODE (XEXP (op, 1)) == CONST_INT);
604 fp_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
606 return reg_renumber && FP_REG_P (op);
611 /* Return truth value of whether OP can be used as an operand in a
612 three operand arithmetic insn that accepts registers of mode MODE
613 or 14-bit signed integers. */
615 arith_operand (rtx op, enum machine_mode mode)
617 return (register_operand (op, mode)
618 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
621 /* Return truth value of whether OP can be used as an operand in a
622 three operand arithmetic insn that accepts registers of mode MODE
623 or 11-bit signed integers. */
625 arith11_operand (rtx op, enum machine_mode mode)
627 return (register_operand (op, mode)
628 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
631 /* Return truth value of whether OP can be used as an operand in a
634 adddi3_operand (rtx op, enum machine_mode mode)
636 return (register_operand (op, mode)
637 || (GET_CODE (op) == CONST_INT
638 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
641 /* A constant integer suitable for use in a PRE_MODIFY memory
644 pre_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
646 return (GET_CODE (op) == CONST_INT
647 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
650 /* A constant integer suitable for use in a POST_MODIFY memory
653 post_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
655 return (GET_CODE (op) == CONST_INT
656 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
660 arith_double_operand (rtx op, enum machine_mode mode)
662 return (register_operand (op, mode)
663 || (GET_CODE (op) == CONST_DOUBLE
664 && GET_MODE (op) == mode
665 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
666 && ((CONST_DOUBLE_HIGH (op) >= 0)
667 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
670 /* Return truth value of whether OP is an integer which fits the
671 range constraining immediate operands in three-address insns, or
672 is an integer register. */
675 ireg_or_int5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
677 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
678 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
681 /* Return nonzero if OP is an integer register, else return zero. */
683 ireg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
685 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
688 /* Return truth value of whether OP is an integer which fits the
689 range constraining immediate operands in three-address insns. */
692 int5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
694 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
698 uint5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
700 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
704 int11_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
706 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
710 uint32_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
712 #if HOST_BITS_PER_WIDE_INT > 32
713 /* All allowed constants will fit a CONST_INT. */
714 return (GET_CODE (op) == CONST_INT
715 && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
717 return (GET_CODE (op) == CONST_INT
718 || (GET_CODE (op) == CONST_DOUBLE
719 && CONST_DOUBLE_HIGH (op) == 0));
724 arith5_operand (rtx op, enum machine_mode mode)
726 return register_operand (op, mode) || int5_operand (op, mode);
729 /* True iff zdepi can be used to generate this CONST_INT.
730 zdepi first sign extends a 5 bit signed number to a given field
731 length, then places this field anywhere in a zero. */
733 zdepi_cint_p (unsigned HOST_WIDE_INT x)
735 unsigned HOST_WIDE_INT lsb_mask, t;
737 /* This might not be obvious, but it's at least fast.
738 This function is critical; we don't have the time loops would take. */
740 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
741 /* Return true iff t is a power of two. */
742 return ((t & (t - 1)) == 0);
745 /* True iff depi or extru can be used to compute (reg & mask).
746 Accept bit pattern like these:
751 and_mask_p (unsigned HOST_WIDE_INT mask)
754 mask += mask & -mask;
755 return (mask & (mask - 1)) == 0;
758 /* True iff depi or extru can be used to compute (reg & OP). */
760 and_operand (rtx op, enum machine_mode mode)
762 return (register_operand (op, mode)
763 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
766 /* True iff depi can be used to compute (reg | MASK). */
768 ior_mask_p (unsigned HOST_WIDE_INT mask)
770 mask += mask & -mask;
771 return (mask & (mask - 1)) == 0;
774 /* True iff depi can be used to compute (reg | OP). */
776 ior_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
778 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
782 lhs_lshift_operand (rtx op, enum machine_mode mode)
784 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
787 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
788 Such values can be the left hand side x in (x << r), using the zvdepi
791 lhs_lshift_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
793 unsigned HOST_WIDE_INT x;
794 if (GET_CODE (op) != CONST_INT)
796 x = INTVAL (op) >> 4;
797 return (x & (x + 1)) == 0;
801 arith32_operand (rtx op, enum machine_mode mode)
803 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
807 pc_or_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
809 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
812 /* Legitimize PIC addresses. If the address is already
813 position-independent, we return ORIG. Newly generated
814 position-independent addresses go to REG. If we need more
815 than one register, we lose. */
818 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
822 /* Labels need special handling. */
823 if (pic_label_operand (orig, mode))
825 /* We do not want to go through the movXX expanders here since that
826 would create recursion.
828 Nor do we really want to call a generator for a named pattern
829 since that requires multiple patterns if we want to support
832 So instead we just emit the raw set, which avoids the movXX
833 expanders completely. */
834 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
835 current_function_uses_pic_offset_table = 1;
838 if (GET_CODE (orig) == SYMBOL_REF)
845 /* Before reload, allocate a temporary register for the intermediate
846 result. This allows the sequence to be deleted when the final
847 result is unused and the insns are trivially dead. */
848 tmp_reg = ((reload_in_progress || reload_completed)
849 ? reg : gen_reg_rtx (Pmode));
851 emit_move_insn (tmp_reg,
852 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
853 gen_rtx_HIGH (word_mode, orig)));
855 = gen_rtx_MEM (Pmode,
856 gen_rtx_LO_SUM (Pmode, tmp_reg,
857 gen_rtx_UNSPEC (Pmode,
861 current_function_uses_pic_offset_table = 1;
862 MEM_NOTRAP_P (pic_ref) = 1;
863 RTX_UNCHANGING_P (pic_ref) = 1;
864 insn = emit_move_insn (reg, pic_ref);
866 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
867 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig, REG_NOTES (insn));
871 else if (GET_CODE (orig) == CONST)
875 if (GET_CODE (XEXP (orig, 0)) == PLUS
876 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
882 if (GET_CODE (XEXP (orig, 0)) == PLUS)
884 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
885 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
886 base == reg ? 0 : reg);
889 if (GET_CODE (orig) == CONST_INT)
891 if (INT_14_BITS (orig))
892 return plus_constant (base, INTVAL (orig));
893 orig = force_reg (Pmode, orig);
895 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
896 /* Likewise, should we set special REG_NOTEs here? */
901 /* Try machine-dependent ways of modifying an illegitimate address
902 to be legitimate. If we find one, return the new, valid address.
903 This macro is used in only one place: `memory_address' in explow.c.
905 OLDX is the address as it was before break_out_memory_refs was called.
906 In some cases it is useful to look at this to decide what needs to be done.
908 MODE and WIN are passed so that this macro can use
909 GO_IF_LEGITIMATE_ADDRESS.
911 It is always safe for this macro to do nothing. It exists to recognize
912 opportunities to optimize the output.
914 For the PA, transform:
916 memory(X + <large int>)
920 if (<large int> & mask) >= 16
921 Y = (<large int> & ~mask) + mask + 1 Round up.
923 Y = (<large int> & ~mask) Round down.
925 memory (Z + (<large int> - Y));
927 This is for CSE to find several similar references, and only use one Z.
929 X can either be a SYMBOL_REF or REG, but because combine can not
930 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
931 D will not fit in 14 bits.
933 MODE_FLOAT references allow displacements which fit in 5 bits, so use
936 MODE_INT references allow displacements which fit in 14 bits, so use
939 This relies on the fact that most mode MODE_FLOAT references will use FP
940 registers and most mode MODE_INT references will use integer registers.
941 (In the rare case of an FP register used in an integer MODE, we depend
942 on secondary reloads to clean things up.)
945 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
946 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
947 addressing modes to be used).
949 Put X and Z into registers. Then put the entire expression into
953 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
954 enum machine_mode mode)
959 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
961 /* Strip off CONST. */
962 if (GET_CODE (x) == CONST)
965 /* Special case. Get the SYMBOL_REF into a register and use indexing.
966 That should always be safe. */
967 if (GET_CODE (x) == PLUS
968 && GET_CODE (XEXP (x, 0)) == REG
969 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
971 rtx reg = force_reg (Pmode, XEXP (x, 1));
972 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
975 /* Note we must reject symbols which represent function addresses
976 since the assembler/linker can't handle arithmetic on plabels. */
977 if (GET_CODE (x) == PLUS
978 && GET_CODE (XEXP (x, 1)) == CONST_INT
979 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
980 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
981 || GET_CODE (XEXP (x, 0)) == REG))
983 rtx int_part, ptr_reg;
985 int offset = INTVAL (XEXP (x, 1));
988 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
989 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
991 /* Choose which way to round the offset. Round up if we
992 are >= halfway to the next boundary. */
993 if ((offset & mask) >= ((mask + 1) / 2))
994 newoffset = (offset & ~ mask) + mask + 1;
996 newoffset = (offset & ~ mask);
998 /* If the newoffset will not fit in 14 bits (ldo), then
999 handling this would take 4 or 5 instructions (2 to load
1000 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1001 add the new offset and the SYMBOL_REF.) Combine can
1002 not handle 4->2 or 5->2 combinations, so do not create
1004 if (! VAL_14_BITS_P (newoffset)
1005 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1007 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1010 gen_rtx_HIGH (Pmode, const_part));
1013 gen_rtx_LO_SUM (Pmode,
1014 tmp_reg, const_part));
1018 if (! VAL_14_BITS_P (newoffset))
1019 int_part = force_reg (Pmode, GEN_INT (newoffset));
1021 int_part = GEN_INT (newoffset);
1023 ptr_reg = force_reg (Pmode,
1024 gen_rtx_PLUS (Pmode,
1025 force_reg (Pmode, XEXP (x, 0)),
1028 return plus_constant (ptr_reg, offset - newoffset);
1031 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1033 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1034 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1035 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1036 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
1037 || GET_CODE (XEXP (x, 1)) == SUBREG)
1038 && GET_CODE (XEXP (x, 1)) != CONST)
1040 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1044 if (GET_CODE (reg1) != REG)
1045 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1047 reg2 = XEXP (XEXP (x, 0), 0);
1048 if (GET_CODE (reg2) != REG)
1049 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1051 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1052 gen_rtx_MULT (Pmode,
1058 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1060 Only do so for floating point modes since this is more speculative
1061 and we lose if it's an integer store. */
1062 if (GET_CODE (x) == PLUS
1063 && GET_CODE (XEXP (x, 0)) == PLUS
1064 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1065 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1066 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1067 && (mode == SFmode || mode == DFmode))
1070 /* First, try and figure out what to use as a base register. */
1071 rtx reg1, reg2, base, idx, orig_base;
1073 reg1 = XEXP (XEXP (x, 0), 1);
1078 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1079 then emit_move_sequence will turn on REG_POINTER so we'll know
1080 it's a base register below. */
1081 if (GET_CODE (reg1) != REG)
1082 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1084 if (GET_CODE (reg2) != REG)
1085 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1087 /* Figure out what the base and index are. */
1089 if (GET_CODE (reg1) == REG
1090 && REG_POINTER (reg1))
1093 orig_base = XEXP (XEXP (x, 0), 1);
1094 idx = gen_rtx_PLUS (Pmode,
1095 gen_rtx_MULT (Pmode,
1096 XEXP (XEXP (XEXP (x, 0), 0), 0),
1097 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1100 else if (GET_CODE (reg2) == REG
1101 && REG_POINTER (reg2))
1104 orig_base = XEXP (x, 1);
1111 /* If the index adds a large constant, try to scale the
1112 constant so that it can be loaded with only one insn. */
1113 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1114 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1115 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1116 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1118 /* Divide the CONST_INT by the scale factor, then add it to A. */
1119 int val = INTVAL (XEXP (idx, 1));
1121 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1122 reg1 = XEXP (XEXP (idx, 0), 0);
1123 if (GET_CODE (reg1) != REG)
1124 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1126 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1128 /* We can now generate a simple scaled indexed address. */
1131 (Pmode, gen_rtx_PLUS (Pmode,
1132 gen_rtx_MULT (Pmode, reg1,
1133 XEXP (XEXP (idx, 0), 1)),
1137 /* If B + C is still a valid base register, then add them. */
1138 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1139 && INTVAL (XEXP (idx, 1)) <= 4096
1140 && INTVAL (XEXP (idx, 1)) >= -4096)
1142 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1145 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1147 reg2 = XEXP (XEXP (idx, 0), 0);
1148 if (GET_CODE (reg2) != CONST_INT)
1149 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1151 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1152 gen_rtx_MULT (Pmode,
1158 /* Get the index into a register, then add the base + index and
1159 return a register holding the result. */
1161 /* First get A into a register. */
1162 reg1 = XEXP (XEXP (idx, 0), 0);
1163 if (GET_CODE (reg1) != REG)
1164 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1166 /* And get B into a register. */
1167 reg2 = XEXP (idx, 1);
1168 if (GET_CODE (reg2) != REG)
1169 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1171 reg1 = force_reg (Pmode,
1172 gen_rtx_PLUS (Pmode,
1173 gen_rtx_MULT (Pmode, reg1,
1174 XEXP (XEXP (idx, 0), 1)),
1177 /* Add the result to our base register and return. */
1178 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1182 /* Uh-oh. We might have an address for x[n-100000]. This needs
1183 special handling to avoid creating an indexed memory address
1184 with x-100000 as the base.
1186 If the constant part is small enough, then it's still safe because
1187 there is a guard page at the beginning and end of the data segment.
1189 Scaled references are common enough that we want to try and rearrange the
1190 terms so that we can use indexing for these addresses too. Only
1191 do the optimization for floatint point modes. */
1193 if (GET_CODE (x) == PLUS
1194 && symbolic_expression_p (XEXP (x, 1)))
1196 /* Ugly. We modify things here so that the address offset specified
1197 by the index expression is computed first, then added to x to form
1198 the entire address. */
1200 rtx regx1, regx2, regy1, regy2, y;
1202 /* Strip off any CONST. */
1204 if (GET_CODE (y) == CONST)
1207 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1209 /* See if this looks like
1210 (plus (mult (reg) (shadd_const))
1211 (const (plus (symbol_ref) (const_int))))
1213 Where const_int is small. In that case the const
1214 expression is a valid pointer for indexing.
1216 If const_int is big, but can be divided evenly by shadd_const
1217 and added to (reg). This allows more scaled indexed addresses. */
1218 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1219 && GET_CODE (XEXP (x, 0)) == MULT
1220 && GET_CODE (XEXP (y, 1)) == CONST_INT
1221 && INTVAL (XEXP (y, 1)) >= -4096
1222 && INTVAL (XEXP (y, 1)) <= 4095
1223 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1224 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1226 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1230 if (GET_CODE (reg1) != REG)
1231 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1233 reg2 = XEXP (XEXP (x, 0), 0);
1234 if (GET_CODE (reg2) != REG)
1235 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1237 return force_reg (Pmode,
1238 gen_rtx_PLUS (Pmode,
1239 gen_rtx_MULT (Pmode,
1244 else if ((mode == DFmode || mode == SFmode)
1245 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1246 && GET_CODE (XEXP (x, 0)) == MULT
1247 && GET_CODE (XEXP (y, 1)) == CONST_INT
1248 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1249 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1250 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1253 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1254 / INTVAL (XEXP (XEXP (x, 0), 1))));
1255 regx2 = XEXP (XEXP (x, 0), 0);
1256 if (GET_CODE (regx2) != REG)
1257 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1258 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1262 gen_rtx_PLUS (Pmode,
1263 gen_rtx_MULT (Pmode, regx2,
1264 XEXP (XEXP (x, 0), 1)),
1265 force_reg (Pmode, XEXP (y, 0))));
1267 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1268 && INTVAL (XEXP (y, 1)) >= -4096
1269 && INTVAL (XEXP (y, 1)) <= 4095)
1271 /* This is safe because of the guard page at the
1272 beginning and end of the data space. Just
1273 return the original address. */
1278 /* Doesn't look like one we can optimize. */
1279 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1280 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1281 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1282 regx1 = force_reg (Pmode,
1283 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1285 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1293 /* For the HPPA, REG and REG+CONST is cost 0
1294 and addresses involving symbolic constants are cost 2.
1296 PIC addresses are very expensive.
1298 It is no coincidence that this has the same structure
1299 as GO_IF_LEGITIMATE_ADDRESS. */
1302 hppa_address_cost (rtx X)
1304 switch (GET_CODE (X))
1317 /* Compute a (partial) cost for rtx X. Return true if the complete
1318 cost has been computed, and false if subexpressions should be
1319 scanned. In either case, *TOTAL contains the cost result. */
1322 hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
1327 if (INTVAL (x) == 0)
1329 else if (INT_14_BITS (x))
1346 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1347 && outer_code != SET)
1354 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1355 *total = COSTS_N_INSNS (3);
1356 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1357 *total = COSTS_N_INSNS (8);
1359 *total = COSTS_N_INSNS (20);
1363 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1365 *total = COSTS_N_INSNS (14);
1373 *total = COSTS_N_INSNS (60);
1376 case PLUS: /* this includes shNadd insns */
1378 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1379 *total = COSTS_N_INSNS (3);
1381 *total = COSTS_N_INSNS (1);
1387 *total = COSTS_N_INSNS (1);
1395 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1396 new rtx with the correct mode. */
1398 force_mode (enum machine_mode mode, rtx orig)
1400 if (mode == GET_MODE (orig))
1403 if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
1406 return gen_rtx_REG (mode, REGNO (orig));
1409 /* Emit insns to move operands[1] into operands[0].
1411 Return 1 if we have written out everything that needs to be done to
1412 do the move. Otherwise, return 0 and the caller will emit the move
1415 Note SCRATCH_REG may not be in the proper mode depending on how it
1416 will be used. This routine is responsible for creating a new copy
1417 of SCRATCH_REG in the proper mode. */
1420 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1422 register rtx operand0 = operands[0];
1423 register rtx operand1 = operands[1];
1427 && reload_in_progress && GET_CODE (operand0) == REG
1428 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1429 operand0 = reg_equiv_mem[REGNO (operand0)];
1430 else if (scratch_reg
1431 && reload_in_progress && GET_CODE (operand0) == SUBREG
1432 && GET_CODE (SUBREG_REG (operand0)) == REG
1433 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1435 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1436 the code which tracks sets/uses for delete_output_reload. */
1437 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1438 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1439 SUBREG_BYTE (operand0));
1440 operand0 = alter_subreg (&temp);
1444 && reload_in_progress && GET_CODE (operand1) == REG
1445 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1446 operand1 = reg_equiv_mem[REGNO (operand1)];
1447 else if (scratch_reg
1448 && reload_in_progress && GET_CODE (operand1) == SUBREG
1449 && GET_CODE (SUBREG_REG (operand1)) == REG
1450 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1452 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1453 the code which tracks sets/uses for delete_output_reload. */
1454 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1455 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1456 SUBREG_BYTE (operand1));
1457 operand1 = alter_subreg (&temp);
1460 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1461 && ((tem = find_replacement (&XEXP (operand0, 0)))
1462 != XEXP (operand0, 0)))
1463 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1464 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1465 && ((tem = find_replacement (&XEXP (operand1, 0)))
1466 != XEXP (operand1, 0)))
1467 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1469 /* Handle secondary reloads for loads/stores of FP registers from
1470 REG+D addresses where D does not fit in 5 bits, including
1471 (subreg (mem (addr))) cases. */
1472 if (fp_reg_operand (operand0, mode)
1473 && ((GET_CODE (operand1) == MEM
1474 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1475 || ((GET_CODE (operand1) == SUBREG
1476 && GET_CODE (XEXP (operand1, 0)) == MEM
1477 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1480 if (GET_CODE (operand1) == SUBREG)
1481 operand1 = XEXP (operand1, 0);
1483 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1484 it in WORD_MODE regardless of what mode it was originally given
1486 scratch_reg = force_mode (word_mode, scratch_reg);
1488 /* D might not fit in 14 bits either; for such cases load D into
1490 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1492 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1493 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1495 XEXP (XEXP (operand1, 0), 0),
1499 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1500 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1501 gen_rtx_MEM (mode, scratch_reg)));
1504 else if (fp_reg_operand (operand1, mode)
1505 && ((GET_CODE (operand0) == MEM
1506 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1507 || ((GET_CODE (operand0) == SUBREG)
1508 && GET_CODE (XEXP (operand0, 0)) == MEM
1509 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1512 if (GET_CODE (operand0) == SUBREG)
1513 operand0 = XEXP (operand0, 0);
1515 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1516 it in WORD_MODE regardless of what mode it was originally given
1518 scratch_reg = force_mode (word_mode, scratch_reg);
1520 /* D might not fit in 14 bits either; for such cases load D into
1522 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1524 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1525 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1528 XEXP (XEXP (operand0, 0),
1533 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1534 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1538 /* Handle secondary reloads for loads of FP registers from constant
1539 expressions by forcing the constant into memory.
1541 use scratch_reg to hold the address of the memory location.
1543 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1544 NO_REGS when presented with a const_int and a register class
1545 containing only FP registers. Doing so unfortunately creates
1546 more problems than it solves. Fix this for 2.5. */
1547 else if (fp_reg_operand (operand0, mode)
1548 && CONSTANT_P (operand1)
1553 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1554 it in WORD_MODE regardless of what mode it was originally given
1556 scratch_reg = force_mode (word_mode, scratch_reg);
1558 /* Force the constant into memory and put the address of the
1559 memory location into scratch_reg. */
1560 xoperands[0] = scratch_reg;
1561 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1562 emit_move_sequence (xoperands, Pmode, 0);
1564 /* Now load the destination register. */
1565 emit_insn (gen_rtx_SET (mode, operand0,
1566 gen_rtx_MEM (mode, scratch_reg)));
1569 /* Handle secondary reloads for SAR. These occur when trying to load
1570 the SAR from memory, FP register, or with a constant. */
1571 else if (GET_CODE (operand0) == REG
1572 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1573 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1574 && (GET_CODE (operand1) == MEM
1575 || GET_CODE (operand1) == CONST_INT
1576 || (GET_CODE (operand1) == REG
1577 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1580 /* D might not fit in 14 bits either; for such cases load D into
1582 if (GET_CODE (operand1) == MEM
1583 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1585 /* We are reloading the address into the scratch register, so we
1586 want to make sure the scratch register is a full register. */
1587 scratch_reg = force_mode (word_mode, scratch_reg);
1589 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1590 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1593 XEXP (XEXP (operand1, 0),
1597 /* Now we are going to load the scratch register from memory,
1598 we want to load it in the same width as the original MEM,
1599 which must be the same as the width of the ultimate destination,
1601 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1603 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1608 /* We want to load the scratch register using the same mode as
1609 the ultimate destination. */
1610 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1612 emit_move_insn (scratch_reg, operand1);
1615 /* And emit the insn to set the ultimate destination. We know that
1616 the scratch register has the same mode as the destination at this
1618 emit_move_insn (operand0, scratch_reg);
1621 /* Handle most common case: storing into a register. */
1622 else if (register_operand (operand0, mode))
1624 if (register_operand (operand1, mode)
1625 || (GET_CODE (operand1) == CONST_INT
1626 && cint_ok_for_move (INTVAL (operand1)))
1627 || (operand1 == CONST0_RTX (mode))
1628 || (GET_CODE (operand1) == HIGH
1629 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1630 /* Only `general_operands' can come here, so MEM is ok. */
1631 || GET_CODE (operand1) == MEM)
1633 /* Run this case quickly. */
1634 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1638 else if (GET_CODE (operand0) == MEM)
1640 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1641 && !(reload_in_progress || reload_completed))
1643 rtx temp = gen_reg_rtx (DFmode);
1645 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1646 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1649 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1651 /* Run this case quickly. */
1652 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1655 if (! (reload_in_progress || reload_completed))
1657 operands[0] = validize_mem (operand0);
1658 operands[1] = operand1 = force_reg (mode, operand1);
1662 /* Simplify the source if we need to.
1663 Note we do have to handle function labels here, even though we do
1664 not consider them legitimate constants. Loop optimizations can
1665 call the emit_move_xxx with one as a source. */
1666 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1667 || function_label_operand (operand1, mode)
1668 || (GET_CODE (operand1) == HIGH
1669 && symbolic_operand (XEXP (operand1, 0), mode)))
1673 if (GET_CODE (operand1) == HIGH)
1676 operand1 = XEXP (operand1, 0);
1678 if (symbolic_operand (operand1, mode))
1680 /* Argh. The assembler and linker can't handle arithmetic
1683 So we force the plabel into memory, load operand0 from
1684 the memory location, then add in the constant part. */
1685 if ((GET_CODE (operand1) == CONST
1686 && GET_CODE (XEXP (operand1, 0)) == PLUS
1687 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1688 || function_label_operand (operand1, mode))
1690 rtx temp, const_part;
1692 /* Figure out what (if any) scratch register to use. */
1693 if (reload_in_progress || reload_completed)
1695 scratch_reg = scratch_reg ? scratch_reg : operand0;
1696 /* SCRATCH_REG will hold an address and maybe the actual
1697 data. We want it in WORD_MODE regardless of what mode it
1698 was originally given to us. */
1699 scratch_reg = force_mode (word_mode, scratch_reg);
1702 scratch_reg = gen_reg_rtx (Pmode);
1704 if (GET_CODE (operand1) == CONST)
1706 /* Save away the constant part of the expression. */
1707 const_part = XEXP (XEXP (operand1, 0), 1);
1708 if (GET_CODE (const_part) != CONST_INT)
1711 /* Force the function label into memory. */
1712 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1716 /* No constant part. */
1717 const_part = NULL_RTX;
1719 /* Force the function label into memory. */
1720 temp = force_const_mem (mode, operand1);
1724 /* Get the address of the memory location. PIC-ify it if
1726 temp = XEXP (temp, 0);
1728 temp = legitimize_pic_address (temp, mode, scratch_reg);
1730 /* Put the address of the memory location into our destination
1733 emit_move_sequence (operands, mode, scratch_reg);
1735 /* Now load from the memory location into our destination
1737 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1738 emit_move_sequence (operands, mode, scratch_reg);
1740 /* And add back in the constant part. */
1741 if (const_part != NULL_RTX)
1742 expand_inc (operand0, const_part);
1751 if (reload_in_progress || reload_completed)
1753 temp = scratch_reg ? scratch_reg : operand0;
1754 /* TEMP will hold an address and maybe the actual
1755 data. We want it in WORD_MODE regardless of what mode it
1756 was originally given to us. */
1757 temp = force_mode (word_mode, temp);
1760 temp = gen_reg_rtx (Pmode);
1762 /* (const (plus (symbol) (const_int))) must be forced to
1763 memory during/after reload if the const_int will not fit
1765 if (GET_CODE (operand1) == CONST
1766 && GET_CODE (XEXP (operand1, 0)) == PLUS
1767 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1768 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1769 && (reload_completed || reload_in_progress)
1772 operands[1] = force_const_mem (mode, operand1);
1773 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1775 emit_move_sequence (operands, mode, temp);
1779 operands[1] = legitimize_pic_address (operand1, mode, temp);
1780 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1783 /* On the HPPA, references to data space are supposed to use dp,
1784 register 27, but showing it in the RTL inhibits various cse
1785 and loop optimizations. */
1790 if (reload_in_progress || reload_completed)
1792 temp = scratch_reg ? scratch_reg : operand0;
1793 /* TEMP will hold an address and maybe the actual
1794 data. We want it in WORD_MODE regardless of what mode it
1795 was originally given to us. */
1796 temp = force_mode (word_mode, temp);
1799 temp = gen_reg_rtx (mode);
1801 /* Loading a SYMBOL_REF into a register makes that register
1802 safe to be used as the base in an indexed address.
1804 Don't mark hard registers though. That loses. */
1805 if (GET_CODE (operand0) == REG
1806 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1807 REG_POINTER (operand0) = 1;
1808 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1809 REG_POINTER (temp) = 1;
1811 set = gen_rtx_SET (mode, operand0, temp);
1813 set = gen_rtx_SET (VOIDmode,
1815 gen_rtx_LO_SUM (mode, temp, operand1));
1817 emit_insn (gen_rtx_SET (VOIDmode,
1819 gen_rtx_HIGH (mode, operand1)));
1825 else if (GET_CODE (operand1) != CONST_INT
1826 || ! cint_ok_for_move (INTVAL (operand1)))
1828 rtx extend = NULL_RTX;
1831 if (TARGET_64BIT && GET_CODE (operand1) == CONST_INT
1832 && HOST_BITS_PER_WIDE_INT > 32
1833 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1835 HOST_WIDE_INT val = INTVAL (operand1);
1838 /* Extract the low order 32 bits of the value and sign extend.
1839 If the new value is the same as the original value, we can
1840 can use the original value as-is. If the new value is
1841 different, we use it and insert the most-significant 32-bits
1842 of the original value into the final result. */
1843 nval = ((val & (((HOST_WIDE_INT) 2 << 31) - 1))
1844 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1847 #if HOST_BITS_PER_WIDE_INT > 32
1848 extend = GEN_INT (val >> 32);
1850 operand1 = GEN_INT (nval);
1854 if (reload_in_progress || reload_completed)
1857 temp = gen_reg_rtx (mode);
1859 /* We don't directly split DImode constants on 32-bit targets
1860 because PLUS uses an 11-bit immediate and the insn sequence
1861 generated is not as efficient as the one using HIGH/LO_SUM. */
1862 if (GET_CODE (operand1) == CONST_INT
1863 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
1865 /* Directly break constant into high and low parts. This
1866 provides better optimization opportunities because various
1867 passes recognize constants split with PLUS but not LO_SUM.
1868 We use a 14-bit signed low part except when the addition
1869 of 0x4000 to the high part might change the sign of the
1871 HOST_WIDE_INT value = INTVAL (operand1);
1872 HOST_WIDE_INT low = value & 0x3fff;
1873 HOST_WIDE_INT high = value & ~ 0x3fff;
1877 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1885 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1886 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1890 emit_insn (gen_rtx_SET (VOIDmode, temp,
1891 gen_rtx_HIGH (mode, operand1)));
1892 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1895 emit_move_insn (operands[0], operands[1]);
1897 if (extend != NULL_RTX)
1898 emit_insn (gen_insv (operands[0], GEN_INT (32), const0_rtx,
1904 /* Now have insn-emit do whatever it normally does. */
1908 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1909 it will need a link/runtime reloc). */
1912 reloc_needed (tree exp)
1916 switch (TREE_CODE (exp))
1923 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1924 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1929 case NON_LVALUE_EXPR:
1930 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1936 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1937 if (TREE_VALUE (link) != 0)
1938 reloc |= reloc_needed (TREE_VALUE (link));
1951 /* Does operand (which is a symbolic_operand) live in text space?
1952 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
1956 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
1958 if (GET_CODE (operand) == CONST)
1959 operand = XEXP (XEXP (operand, 0), 0);
1962 if (GET_CODE (operand) == SYMBOL_REF)
1963 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1967 if (GET_CODE (operand) == SYMBOL_REF)
1968 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1974 /* Return the best assembler insn template
1975 for moving operands[1] into operands[0] as a fullword. */
1977 singlemove_string (rtx *operands)
1979 HOST_WIDE_INT intval;
1981 if (GET_CODE (operands[0]) == MEM)
1982 return "stw %r1,%0";
1983 if (GET_CODE (operands[1]) == MEM)
1985 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1990 if (GET_MODE (operands[1]) != SFmode)
1993 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1995 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1996 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1998 operands[1] = GEN_INT (i);
1999 /* Fall through to CONST_INT case. */
2001 if (GET_CODE (operands[1]) == CONST_INT)
2003 intval = INTVAL (operands[1]);
2005 if (VAL_14_BITS_P (intval))
2007 else if ((intval & 0x7ff) == 0)
2008 return "ldil L'%1,%0";
2009 else if (zdepi_cint_p (intval))
2010 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2012 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2014 return "copy %1,%0";
2018 /* Compute position (in OP[1]) and width (in OP[2])
2019 useful for copying IMM to a register using the zdepi
2020 instructions. Store the immediate value to insert in OP[0]. */
2022 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2026 /* Find the least significant set bit in IMM. */
2027 for (lsb = 0; lsb < 32; lsb++)
2034 /* Choose variants based on *sign* of the 5-bit field. */
2035 if ((imm & 0x10) == 0)
2036 len = (lsb <= 28) ? 4 : 32 - lsb;
2039 /* Find the width of the bitstring in IMM. */
2040 for (len = 5; len < 32; len++)
2042 if ((imm & (1 << len)) == 0)
2046 /* Sign extend IMM as a 5-bit value. */
2047 imm = (imm & 0xf) - 0x10;
2055 /* Compute position (in OP[1]) and width (in OP[2])
2056 useful for copying IMM to a register using the depdi,z
2057 instructions. Store the immediate value to insert in OP[0]. */
2059 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2061 HOST_WIDE_INT lsb, len;
2063 /* Find the least significant set bit in IMM. */
2064 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2071 /* Choose variants based on *sign* of the 5-bit field. */
2072 if ((imm & 0x10) == 0)
2073 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2074 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2077 /* Find the width of the bitstring in IMM. */
2078 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2080 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2084 /* Sign extend IMM as a 5-bit value. */
2085 imm = (imm & 0xf) - 0x10;
2093 /* Output assembler code to perform a doubleword move insn
2094 with operands OPERANDS. */
2097 output_move_double (rtx *operands)
2099 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2101 rtx addreg0 = 0, addreg1 = 0;
2103 /* First classify both operands. */
2105 if (REG_P (operands[0]))
2107 else if (offsettable_memref_p (operands[0]))
2109 else if (GET_CODE (operands[0]) == MEM)
2114 if (REG_P (operands[1]))
2116 else if (CONSTANT_P (operands[1]))
2118 else if (offsettable_memref_p (operands[1]))
2120 else if (GET_CODE (operands[1]) == MEM)
2125 /* Check for the cases that the operand constraints are not
2126 supposed to allow to happen. Abort if we get one,
2127 because generating code for these cases is painful. */
2129 if (optype0 != REGOP && optype1 != REGOP)
2132 /* Handle auto decrementing and incrementing loads and stores
2133 specifically, since the structure of the function doesn't work
2134 for them without major modification. Do it better when we learn
2135 this port about the general inc/dec addressing of PA.
2136 (This was written by tege. Chide him if it doesn't work.) */
2138 if (optype0 == MEMOP)
2140 /* We have to output the address syntax ourselves, since print_operand
2141 doesn't deal with the addresses we want to use. Fix this later. */
2143 rtx addr = XEXP (operands[0], 0);
2144 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2146 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2148 operands[0] = XEXP (addr, 0);
2149 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2152 if (!reg_overlap_mentioned_p (high_reg, addr))
2154 /* No overlap between high target register and address
2155 register. (We do this in a non-obvious way to
2156 save a register file writeback) */
2157 if (GET_CODE (addr) == POST_INC)
2158 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2159 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2164 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2166 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2168 operands[0] = XEXP (addr, 0);
2169 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2172 if (!reg_overlap_mentioned_p (high_reg, addr))
2174 /* No overlap between high target register and address
2175 register. (We do this in a non-obvious way to
2176 save a register file writeback) */
2177 if (GET_CODE (addr) == PRE_INC)
2178 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2179 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2185 if (optype1 == MEMOP)
2187 /* We have to output the address syntax ourselves, since print_operand
2188 doesn't deal with the addresses we want to use. Fix this later. */
2190 rtx addr = XEXP (operands[1], 0);
2191 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2193 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2195 operands[1] = XEXP (addr, 0);
2196 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2199 if (!reg_overlap_mentioned_p (high_reg, addr))
2201 /* No overlap between high target register and address
2202 register. (We do this in a non-obvious way to
2203 save a register file writeback) */
2204 if (GET_CODE (addr) == POST_INC)
2205 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2206 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2210 /* This is an undefined situation. We should load into the
2211 address register *and* update that register. Probably
2212 we don't need to handle this at all. */
2213 if (GET_CODE (addr) == POST_INC)
2214 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2215 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2218 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2220 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2222 operands[1] = XEXP (addr, 0);
2223 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2226 if (!reg_overlap_mentioned_p (high_reg, addr))
2228 /* No overlap between high target register and address
2229 register. (We do this in a non-obvious way to
2230 save a register file writeback) */
2231 if (GET_CODE (addr) == PRE_INC)
2232 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2233 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2237 /* This is an undefined situation. We should load into the
2238 address register *and* update that register. Probably
2239 we don't need to handle this at all. */
2240 if (GET_CODE (addr) == PRE_INC)
2241 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2242 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2245 else if (GET_CODE (addr) == PLUS
2246 && GET_CODE (XEXP (addr, 0)) == MULT)
2248 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2250 if (!reg_overlap_mentioned_p (high_reg, addr))
2254 xoperands[0] = high_reg;
2255 xoperands[1] = XEXP (addr, 1);
2256 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2257 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2258 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2260 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2266 xoperands[0] = high_reg;
2267 xoperands[1] = XEXP (addr, 1);
2268 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2269 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2270 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2272 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2277 /* If an operand is an unoffsettable memory ref, find a register
2278 we can increment temporarily to make it refer to the second word. */
2280 if (optype0 == MEMOP)
2281 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2283 if (optype1 == MEMOP)
2284 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2286 /* Ok, we can do one word at a time.
2287 Normally we do the low-numbered word first.
2289 In either case, set up in LATEHALF the operands to use
2290 for the high-numbered word and in some cases alter the
2291 operands in OPERANDS to be suitable for the low-numbered word. */
2293 if (optype0 == REGOP)
2294 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2295 else if (optype0 == OFFSOP)
2296 latehalf[0] = adjust_address (operands[0], SImode, 4);
2298 latehalf[0] = operands[0];
2300 if (optype1 == REGOP)
2301 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2302 else if (optype1 == OFFSOP)
2303 latehalf[1] = adjust_address (operands[1], SImode, 4);
2304 else if (optype1 == CNSTOP)
2305 split_double (operands[1], &operands[1], &latehalf[1]);
2307 latehalf[1] = operands[1];
2309 /* If the first move would clobber the source of the second one,
2310 do them in the other order.
2312 This can happen in two cases:
2314 mem -> register where the first half of the destination register
2315 is the same register used in the memory's address. Reload
2316 can create such insns.
2318 mem in this case will be either register indirect or register
2319 indirect plus a valid offset.
2321 register -> register move where REGNO(dst) == REGNO(src + 1)
2322 someone (Tim/Tege?) claimed this can happen for parameter loads.
2324 Handle mem -> register case first. */
2325 if (optype0 == REGOP
2326 && (optype1 == MEMOP || optype1 == OFFSOP)
2327 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2330 /* Do the late half first. */
2332 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2333 output_asm_insn (singlemove_string (latehalf), latehalf);
2337 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2338 return singlemove_string (operands);
2341 /* Now handle register -> register case. */
2342 if (optype0 == REGOP && optype1 == REGOP
2343 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2345 output_asm_insn (singlemove_string (latehalf), latehalf);
2346 return singlemove_string (operands);
2349 /* Normal case: do the two words, low-numbered first. */
2351 output_asm_insn (singlemove_string (operands), operands);
2353 /* Make any unoffsettable addresses point at high-numbered word. */
2355 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2357 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2360 output_asm_insn (singlemove_string (latehalf), latehalf);
2362 /* Undo the adds we just did. */
2364 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2366 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2372 output_fp_move_double (rtx *operands)
2374 if (FP_REG_P (operands[0]))
2376 if (FP_REG_P (operands[1])
2377 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2378 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2380 output_asm_insn ("fldd%F1 %1,%0", operands);
2382 else if (FP_REG_P (operands[1]))
2384 output_asm_insn ("fstd%F0 %1,%0", operands);
2386 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2388 if (GET_CODE (operands[0]) == REG)
2391 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2392 xoperands[0] = operands[0];
2393 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2395 /* This is a pain. You have to be prepared to deal with an
2396 arbitrary address here including pre/post increment/decrement.
2398 so avoid this in the MD. */
2406 /* Return a REG that occurs in ADDR with coefficient 1.
2407 ADDR can be effectively incremented by incrementing REG. */
2410 find_addr_reg (rtx addr)
2412 while (GET_CODE (addr) == PLUS)
2414 if (GET_CODE (XEXP (addr, 0)) == REG)
2415 addr = XEXP (addr, 0);
2416 else if (GET_CODE (XEXP (addr, 1)) == REG)
2417 addr = XEXP (addr, 1);
2418 else if (CONSTANT_P (XEXP (addr, 0)))
2419 addr = XEXP (addr, 1);
2420 else if (CONSTANT_P (XEXP (addr, 1)))
2421 addr = XEXP (addr, 0);
2425 if (GET_CODE (addr) == REG)
2430 /* Emit code to perform a block move.
2432 OPERANDS[0] is the destination pointer as a REG, clobbered.
2433 OPERANDS[1] is the source pointer as a REG, clobbered.
2434 OPERANDS[2] is a register for temporary storage.
2435 OPERANDS[3] is a register for temporary storage.
2436 OPERANDS[4] is the size as a CONST_INT
2437 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2438 OPERANDS[6] is another temporary register. */
2441 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2443 int align = INTVAL (operands[5]);
2444 unsigned long n_bytes = INTVAL (operands[4]);
2446 /* We can't move more than a word at a time because the PA
2447 has no longer integer move insns. (Could use fp mem ops?) */
2448 if (align > (TARGET_64BIT ? 8 : 4))
2449 align = (TARGET_64BIT ? 8 : 4);
2451 /* Note that we know each loop below will execute at least twice
2452 (else we would have open-coded the copy). */
2456 /* Pre-adjust the loop counter. */
2457 operands[4] = GEN_INT (n_bytes - 16);
2458 output_asm_insn ("ldi %4,%2", operands);
2461 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2462 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2463 output_asm_insn ("std,ma %3,8(%0)", operands);
2464 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2465 output_asm_insn ("std,ma %6,8(%0)", operands);
2467 /* Handle the residual. There could be up to 7 bytes of
2468 residual to copy! */
2469 if (n_bytes % 16 != 0)
2471 operands[4] = GEN_INT (n_bytes % 8);
2472 if (n_bytes % 16 >= 8)
2473 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2474 if (n_bytes % 8 != 0)
2475 output_asm_insn ("ldd 0(%1),%6", operands);
2476 if (n_bytes % 16 >= 8)
2477 output_asm_insn ("std,ma %3,8(%0)", operands);
2478 if (n_bytes % 8 != 0)
2479 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2484 /* Pre-adjust the loop counter. */
2485 operands[4] = GEN_INT (n_bytes - 8);
2486 output_asm_insn ("ldi %4,%2", operands);
2489 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2490 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2491 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2492 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2493 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2495 /* Handle the residual. There could be up to 7 bytes of
2496 residual to copy! */
2497 if (n_bytes % 8 != 0)
2499 operands[4] = GEN_INT (n_bytes % 4);
2500 if (n_bytes % 8 >= 4)
2501 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2502 if (n_bytes % 4 != 0)
2503 output_asm_insn ("ldw 0(%1),%6", operands);
2504 if (n_bytes % 8 >= 4)
2505 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2506 if (n_bytes % 4 != 0)
2507 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2512 /* Pre-adjust the loop counter. */
2513 operands[4] = GEN_INT (n_bytes - 4);
2514 output_asm_insn ("ldi %4,%2", operands);
2517 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2518 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2519 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2520 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2521 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2523 /* Handle the residual. */
2524 if (n_bytes % 4 != 0)
2526 if (n_bytes % 4 >= 2)
2527 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2528 if (n_bytes % 2 != 0)
2529 output_asm_insn ("ldb 0(%1),%6", operands);
2530 if (n_bytes % 4 >= 2)
2531 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2532 if (n_bytes % 2 != 0)
2533 output_asm_insn ("stb %6,0(%0)", operands);
2538 /* Pre-adjust the loop counter. */
2539 operands[4] = GEN_INT (n_bytes - 2);
2540 output_asm_insn ("ldi %4,%2", operands);
2543 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2544 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2545 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2546 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2547 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2549 /* Handle the residual. */
2550 if (n_bytes % 2 != 0)
2552 output_asm_insn ("ldb 0(%1),%3", operands);
2553 output_asm_insn ("stb %3,0(%0)", operands);
2562 /* Count the number of insns necessary to handle this block move.
2564 Basic structure is the same as emit_block_move, except that we
2565 count insns rather than emit them. */
2568 compute_movstr_length (rtx insn)
2570 rtx pat = PATTERN (insn);
2571 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2572 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2573 unsigned int n_insns = 0;
2575 /* We can't move more than four bytes at a time because the PA
2576 has no longer integer move insns. (Could use fp mem ops?) */
2577 if (align > (TARGET_64BIT ? 8 : 4))
2578 align = (TARGET_64BIT ? 8 : 4);
2580 /* The basic copying loop. */
2584 if (n_bytes % (2 * align) != 0)
2586 if ((n_bytes % (2 * align)) >= align)
2589 if ((n_bytes % align) != 0)
2593 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2597 /* Emit code to perform a block clear.
2599 OPERANDS[0] is the destination pointer as a REG, clobbered.
2600 OPERANDS[1] is a register for temporary storage.
2601 OPERANDS[2] is the size as a CONST_INT
2602 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2605 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2607 int align = INTVAL (operands[3]);
2608 unsigned long n_bytes = INTVAL (operands[2]);
2610 /* We can't clear more than a word at a time because the PA
2611 has no longer integer move insns. */
2612 if (align > (TARGET_64BIT ? 8 : 4))
2613 align = (TARGET_64BIT ? 8 : 4);
2615 /* Note that we know each loop below will execute at least twice
2616 (else we would have open-coded the copy). */
2620 /* Pre-adjust the loop counter. */
2621 operands[2] = GEN_INT (n_bytes - 16);
2622 output_asm_insn ("ldi %2,%1", operands);
2625 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2626 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2627 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2629 /* Handle the residual. There could be up to 7 bytes of
2630 residual to copy! */
2631 if (n_bytes % 16 != 0)
2633 operands[2] = GEN_INT (n_bytes % 8);
2634 if (n_bytes % 16 >= 8)
2635 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2636 if (n_bytes % 8 != 0)
2637 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2642 /* Pre-adjust the loop counter. */
2643 operands[2] = GEN_INT (n_bytes - 8);
2644 output_asm_insn ("ldi %2,%1", operands);
2647 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2648 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2649 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2651 /* Handle the residual. There could be up to 7 bytes of
2652 residual to copy! */
2653 if (n_bytes % 8 != 0)
2655 operands[2] = GEN_INT (n_bytes % 4);
2656 if (n_bytes % 8 >= 4)
2657 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2658 if (n_bytes % 4 != 0)
2659 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2664 /* Pre-adjust the loop counter. */
2665 operands[2] = GEN_INT (n_bytes - 4);
2666 output_asm_insn ("ldi %2,%1", operands);
2669 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2670 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2671 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2673 /* Handle the residual. */
2674 if (n_bytes % 4 != 0)
2676 if (n_bytes % 4 >= 2)
2677 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2678 if (n_bytes % 2 != 0)
2679 output_asm_insn ("stb %%r0,0(%0)", operands);
2684 /* Pre-adjust the loop counter. */
2685 operands[2] = GEN_INT (n_bytes - 2);
2686 output_asm_insn ("ldi %2,%1", operands);
2689 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2690 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2691 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2693 /* Handle the residual. */
2694 if (n_bytes % 2 != 0)
2695 output_asm_insn ("stb %%r0,0(%0)", operands);
2704 /* Count the number of insns necessary to handle this block move.
2706 Basic structure is the same as emit_block_move, except that we
2707 count insns rather than emit them. */
2710 compute_clrstr_length (rtx insn)
2712 rtx pat = PATTERN (insn);
2713 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2714 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2715 unsigned int n_insns = 0;
2717 /* We can't clear more than a word at a time because the PA
2718 has no longer integer move insns. */
2719 if (align > (TARGET_64BIT ? 8 : 4))
2720 align = (TARGET_64BIT ? 8 : 4);
2722 /* The basic loop. */
2726 if (n_bytes % (2 * align) != 0)
2728 if ((n_bytes % (2 * align)) >= align)
2731 if ((n_bytes % align) != 0)
2735 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2741 output_and (rtx *operands)
2743 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2745 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2746 int ls0, ls1, ms0, p, len;
2748 for (ls0 = 0; ls0 < 32; ls0++)
2749 if ((mask & (1 << ls0)) == 0)
2752 for (ls1 = ls0; ls1 < 32; ls1++)
2753 if ((mask & (1 << ls1)) != 0)
2756 for (ms0 = ls1; ms0 < 32; ms0++)
2757 if ((mask & (1 << ms0)) == 0)
2770 operands[2] = GEN_INT (len);
2771 return "{extru|extrw,u} %1,31,%2,%0";
2775 /* We could use this `depi' for the case above as well, but `depi'
2776 requires one more register file access than an `extru'. */
2781 operands[2] = GEN_INT (p);
2782 operands[3] = GEN_INT (len);
2783 return "{depi|depwi} 0,%2,%3,%0";
2787 return "and %1,%2,%0";
2790 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2791 storing the result in operands[0]. */
2793 output_64bit_and (rtx *operands)
2795 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2797 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2798 int ls0, ls1, ms0, p, len;
2800 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2801 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2804 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2805 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2808 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2809 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2812 if (ms0 != HOST_BITS_PER_WIDE_INT)
2815 if (ls1 == HOST_BITS_PER_WIDE_INT)
2822 operands[2] = GEN_INT (len);
2823 return "extrd,u %1,63,%2,%0";
2827 /* We could use this `depi' for the case above as well, but `depi'
2828 requires one more register file access than an `extru'. */
2833 operands[2] = GEN_INT (p);
2834 operands[3] = GEN_INT (len);
2835 return "depdi 0,%2,%3,%0";
2839 return "and %1,%2,%0";
2843 output_ior (rtx *operands)
2845 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2846 int bs0, bs1, p, len;
2848 if (INTVAL (operands[2]) == 0)
2849 return "copy %1,%0";
2851 for (bs0 = 0; bs0 < 32; bs0++)
2852 if ((mask & (1 << bs0)) != 0)
2855 for (bs1 = bs0; bs1 < 32; bs1++)
2856 if ((mask & (1 << bs1)) == 0)
2859 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2865 operands[2] = GEN_INT (p);
2866 operands[3] = GEN_INT (len);
2867 return "{depi|depwi} -1,%2,%3,%0";
2870 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2871 storing the result in operands[0]. */
2873 output_64bit_ior (rtx *operands)
2875 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2876 int bs0, bs1, p, len;
2878 if (INTVAL (operands[2]) == 0)
2879 return "copy %1,%0";
2881 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2882 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
2885 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2886 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
2889 if (bs1 != HOST_BITS_PER_WIDE_INT
2890 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2896 operands[2] = GEN_INT (p);
2897 operands[3] = GEN_INT (len);
2898 return "depdi -1,%2,%3,%0";
2901 /* Target hook for assembling integer objects. This code handles
2902 aligned SI and DI integers specially, since function references must
2903 be preceded by P%. */
2906 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
2908 if (size == UNITS_PER_WORD && aligned_p
2909 && function_label_operand (x, VOIDmode))
2911 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
2912 output_addr_const (asm_out_file, x);
2913 fputc ('\n', asm_out_file);
2916 return default_assemble_integer (x, size, aligned_p);
2919 /* Output an ascii string. */
2921 output_ascii (FILE *file, const char *p, int size)
2925 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2927 /* The HP assembler can only take strings of 256 characters at one
2928 time. This is a limitation on input line length, *not* the
2929 length of the string. Sigh. Even worse, it seems that the
2930 restriction is in number of input characters (see \xnn &
2931 \whatever). So we have to do this very carefully. */
2933 fputs ("\t.STRING \"", file);
2936 for (i = 0; i < size; i += 4)
2940 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2942 register unsigned int c = (unsigned char) p[i + io];
2944 if (c == '\"' || c == '\\')
2945 partial_output[co++] = '\\';
2946 if (c >= ' ' && c < 0177)
2947 partial_output[co++] = c;
2951 partial_output[co++] = '\\';
2952 partial_output[co++] = 'x';
2953 hexd = c / 16 - 0 + '0';
2955 hexd -= '9' - 'a' + 1;
2956 partial_output[co++] = hexd;
2957 hexd = c % 16 - 0 + '0';
2959 hexd -= '9' - 'a' + 1;
2960 partial_output[co++] = hexd;
2963 if (chars_output + co > 243)
2965 fputs ("\"\n\t.STRING \"", file);
2968 fwrite (partial_output, 1, (size_t) co, file);
2972 fputs ("\"\n", file);
2975 /* Try to rewrite floating point comparisons & branches to avoid
2976 useless add,tr insns.
2978 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2979 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2980 first attempt to remove useless add,tr insns. It is zero
2981 for the second pass as reorg sometimes leaves bogus REG_DEAD
2984 When CHECK_NOTES is zero we can only eliminate add,tr insns
2985 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2988 remove_useless_addtr_insns (int check_notes)
2991 static int pass = 0;
2993 /* This is fairly cheap, so always run it when optimizing. */
2997 int fbranch_count = 0;
2999 /* Walk all the insns in this function looking for fcmp & fbranch
3000 instructions. Keep track of how many of each we find. */
3001 for (insn = get_insns (); insn; insn = next_insn (insn))
3005 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3006 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3009 tmp = PATTERN (insn);
3011 /* It must be a set. */
3012 if (GET_CODE (tmp) != SET)
3015 /* If the destination is CCFP, then we've found an fcmp insn. */
3016 tmp = SET_DEST (tmp);
3017 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3023 tmp = PATTERN (insn);
3024 /* If this is an fbranch instruction, bump the fbranch counter. */
3025 if (GET_CODE (tmp) == SET
3026 && SET_DEST (tmp) == pc_rtx
3027 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3028 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3029 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3030 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3038 /* Find all floating point compare + branch insns. If possible,
3039 reverse the comparison & the branch to avoid add,tr insns. */
3040 for (insn = get_insns (); insn; insn = next_insn (insn))
3044 /* Ignore anything that isn't an INSN. */
3045 if (GET_CODE (insn) != INSN)
3048 tmp = PATTERN (insn);
3050 /* It must be a set. */
3051 if (GET_CODE (tmp) != SET)
3054 /* The destination must be CCFP, which is register zero. */
3055 tmp = SET_DEST (tmp);
3056 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3059 /* INSN should be a set of CCFP.
3061 See if the result of this insn is used in a reversed FP
3062 conditional branch. If so, reverse our condition and
3063 the branch. Doing so avoids useless add,tr insns. */
3064 next = next_insn (insn);
3067 /* Jumps, calls and labels stop our search. */
3068 if (GET_CODE (next) == JUMP_INSN
3069 || GET_CODE (next) == CALL_INSN
3070 || GET_CODE (next) == CODE_LABEL)
3073 /* As does another fcmp insn. */
3074 if (GET_CODE (next) == INSN
3075 && GET_CODE (PATTERN (next)) == SET
3076 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3077 && REGNO (SET_DEST (PATTERN (next))) == 0)
3080 next = next_insn (next);
3083 /* Is NEXT_INSN a branch? */
3085 && GET_CODE (next) == JUMP_INSN)
3087 rtx pattern = PATTERN (next);
3089 /* If it a reversed fp conditional branch (eg uses add,tr)
3090 and CCFP dies, then reverse our conditional and the branch
3091 to avoid the add,tr. */
3092 if (GET_CODE (pattern) == SET
3093 && SET_DEST (pattern) == pc_rtx
3094 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3095 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3096 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3097 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3098 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3099 && (fcmp_count == fbranch_count
3101 && find_regno_note (next, REG_DEAD, 0))))
3103 /* Reverse the branch. */
3104 tmp = XEXP (SET_SRC (pattern), 1);
3105 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3106 XEXP (SET_SRC (pattern), 2) = tmp;
3107 INSN_CODE (next) = -1;
3109 /* Reverse our condition. */
3110 tmp = PATTERN (insn);
3111 PUT_CODE (XEXP (tmp, 1),
3112 (reverse_condition_maybe_unordered
3113 (GET_CODE (XEXP (tmp, 1)))));
3123 /* You may have trouble believing this, but this is the 32 bit HP-PA
3128 Variable arguments (optional; any number may be allocated)
3130 SP-(4*(N+9)) arg word N
3135 Fixed arguments (must be allocated; may remain unused)
3144 SP-32 External Data Pointer (DP)
3146 SP-24 External/stub RP (RP')
3150 SP-8 Calling Stub RP (RP'')
3155 SP-0 Stack Pointer (points to next available address)
3159 /* This function saves registers as follows. Registers marked with ' are
3160 this function's registers (as opposed to the previous function's).
3161 If a frame_pointer isn't needed, r4 is saved as a general register;
3162 the space for the frame pointer is still allocated, though, to keep
3168 SP (FP') Previous FP
3169 SP + 4 Alignment filler (sigh)
3170 SP + 8 Space for locals reserved here.
3174 SP + n All call saved register used.
3178 SP + o All call saved fp registers used.
3182 SP + p (SP') points to next available address.
3186 /* Global variables set by output_function_prologue(). */
3187 /* Size of frame. Need to know this to emit return insns from
3189 static int actual_fsize;
3190 static int local_fsize, save_fregs;
3192 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3193 Handle case where DISP > 8k by using the add_high_const patterns.
3195 Note in DISP > 8k case, we will leave the high part of the address
3196 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3199 store_reg (int reg, int disp, int base)
3201 rtx insn, dest, src, basereg;
3203 src = gen_rtx_REG (word_mode, reg);
3204 basereg = gen_rtx_REG (Pmode, base);
3205 if (VAL_14_BITS_P (disp))
3207 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3208 insn = emit_move_insn (dest, src);
3212 rtx delta = GEN_INT (disp);
3213 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3214 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3215 emit_move_insn (tmpreg, high);
3216 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3217 insn = emit_move_insn (dest, src);
3221 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3222 gen_rtx_SET (VOIDmode,
3223 gen_rtx_MEM (word_mode,
3224 gen_rtx_PLUS (word_mode, basereg,
3232 RTX_FRAME_RELATED_P (insn) = 1;
3235 /* Emit RTL to store REG at the memory location specified by BASE and then
3236 add MOD to BASE. MOD must be <= 8k. */
3239 store_reg_modify (int base, int reg, int mod)
3241 rtx insn, basereg, srcreg, delta;
3243 if (! VAL_14_BITS_P (mod))
3246 basereg = gen_rtx_REG (Pmode, base);
3247 srcreg = gen_rtx_REG (word_mode, reg);
3248 delta = GEN_INT (mod);
3250 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3253 RTX_FRAME_RELATED_P (insn) = 1;
3255 /* RTX_FRAME_RELATED_P must be set on each frame related set
3256 in a parallel with more than one element. Don't set
3257 RTX_FRAME_RELATED_P in the first set if reg is temporary
3258 register 1. The effect of this operation is recorded in
3259 the initial copy. */
3262 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3263 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3267 /* The first element of a PARALLEL is always processed if it is
3268 a SET. Thus, we need an expression list for this case. */
3270 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3271 gen_rtx_SET (VOIDmode, basereg,
3272 gen_rtx_PLUS (word_mode, basereg, delta)),
3278 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3279 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3280 whether to add a frame note or not.
3282 In the DISP > 8k case, we leave the high part of the address in %r1.
3283 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3286 set_reg_plus_d (int reg, int base, int disp, int note)
3290 if (VAL_14_BITS_P (disp))
3292 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3293 plus_constant (gen_rtx_REG (Pmode, base), disp));
3297 rtx basereg = gen_rtx_REG (Pmode, base);
3298 rtx delta = GEN_INT (disp);
3300 emit_move_insn (gen_rtx_REG (Pmode, 1),
3301 gen_rtx_PLUS (Pmode, basereg,
3302 gen_rtx_HIGH (Pmode, delta)));
3303 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3304 gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 1),
3308 if (DO_FRAME_NOTES && note)
3309 RTX_FRAME_RELATED_P (insn) = 1;
3313 compute_frame_size (int size, int *fregs_live)
3318 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3319 be consistent with the rounding and size calculation done here.
3320 Change them at the same time. */
3322 /* We do our own stack alignment. First, round the size of the
3323 stack locals up to a word boundary. */
3324 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3326 /* Space for previous frame pointer + filler. If any frame is
3327 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3328 waste some space here for the sake of HP compatibility. The
3329 first slot is only used when the frame pointer is needed. */
3330 if (size || frame_pointer_needed)
3331 size += STARTING_FRAME_OFFSET;
3333 /* If the current function calls __builtin_eh_return, then we need
3334 to allocate stack space for registers that will hold data for
3335 the exception handler. */
3336 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3340 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3342 size += i * UNITS_PER_WORD;
3345 /* Account for space used by the callee general register saves. */
3346 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3347 if (regs_ever_live[i])
3348 size += UNITS_PER_WORD;
3350 /* Account for space used by the callee floating point register saves. */
3351 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3352 if (regs_ever_live[i]
3353 || (!TARGET_64BIT && regs_ever_live[i + 1]))
3357 /* We always save both halves of the FP register, so always
3358 increment the frame size by 8 bytes. */
3362 /* If any of the floating registers are saved, account for the
3363 alignment needed for the floating point register save block. */
3366 size = (size + 7) & ~7;
3371 /* The various ABIs include space for the outgoing parameters in the
3372 size of the current function's stack frame. We don't need to align
3373 for the outgoing arguments as their alignment is set by the final
3374 rounding for the frame as a whole. */
3375 size += current_function_outgoing_args_size;
3377 /* Allocate space for the fixed frame marker. This space must be
3378 allocated for any function that makes calls or allocates
3380 if (!current_function_is_leaf || size)
3381 size += TARGET_64BIT ? 48 : 32;
3383 /* Finally, round to the preferred stack boundary. */
3384 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3385 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3388 /* Generate the assembly code for function entry. FILE is a stdio
3389 stream to output the code to. SIZE is an int: how many units of
3390 temporary storage to allocate.
3392 Refer to the array `regs_ever_live' to determine which registers to
3393 save; `regs_ever_live[I]' is nonzero if register number I is ever
3394 used in the function. This function is responsible for knowing
3395 which registers should not be saved even if used. */
3397 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3398 of memory. If any fpu reg is used in the function, we allocate
3399 such a block here, at the bottom of the frame, just in case it's needed.
3401 If this function is a leaf procedure, then we may choose not
3402 to do a "save" insn. The decision about whether or not
3403 to do this is made in regclass.c. */
3406 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3408 /* The function's label and associated .PROC must never be
3409 separated and must be output *after* any profiling declarations
3410 to avoid changing spaces/subspaces within a procedure. */
3411 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3412 fputs ("\t.PROC\n", file);
3414 /* hppa_expand_prologue does the dirty work now. We just need
3415 to output the assembler directives which denote the start
3417 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
3418 if (regs_ever_live[2])
3419 fputs (",CALLS,SAVE_RP", file);
3421 fputs (",NO_CALLS", file);
3423 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3424 at the beginning of the frame and that it is used as the frame
3425 pointer for the frame. We do this because our current frame
3426 layout doesn't conform to that specified in the the HP runtime
3427 documentation and we need a way to indicate to programs such as
3428 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3429 isn't used by HP compilers but is supported by the assembler.
3430 However, SAVE_SP is supposed to indicate that the previous stack
3431 pointer has been saved in the frame marker. */
3432 if (frame_pointer_needed)
3433 fputs (",SAVE_SP", file);
3435 /* Pass on information about the number of callee register saves
3436 performed in the prologue.
3438 The compiler is supposed to pass the highest register number
3439 saved, the assembler then has to adjust that number before
3440 entering it into the unwind descriptor (to account for any
3441 caller saved registers with lower register numbers than the
3442 first callee saved register). */
3444 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3447 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3449 fputs ("\n\t.ENTRY\n", file);
3451 remove_useless_addtr_insns (0);
3455 hppa_expand_prologue (void)
3457 int merge_sp_adjust_with_store = 0;
3458 int size = get_frame_size ();
3466 /* Compute total size for frame pointer, filler, locals and rounding to
3467 the next word boundary. Similar code appears in compute_frame_size
3468 and must be changed in tandem with this code. */
3469 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3470 if (local_fsize || frame_pointer_needed)
3471 local_fsize += STARTING_FRAME_OFFSET;
3473 actual_fsize = compute_frame_size (size, &save_fregs);
3475 /* Compute a few things we will use often. */
3476 tmpreg = gen_rtx_REG (word_mode, 1);
3478 /* Save RP first. The calling conventions manual states RP will
3479 always be stored into the caller's frame at sp - 20 or sp - 16
3480 depending on which ABI is in use. */
3481 if (regs_ever_live[2] || current_function_calls_eh_return)
3482 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3484 /* Allocate the local frame and set up the frame pointer if needed. */
3485 if (actual_fsize != 0)
3487 if (frame_pointer_needed)
3489 /* Copy the old frame pointer temporarily into %r1. Set up the
3490 new stack pointer, then store away the saved old frame pointer
3491 into the stack at sp and at the same time update the stack
3492 pointer by actual_fsize bytes. Two versions, first
3493 handles small (<8k) frames. The second handles large (>=8k)
3495 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3498 /* We need to record the frame pointer save here since the
3499 new frame pointer is set in the following insn. */
3500 RTX_FRAME_RELATED_P (insn) = 1;
3502 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3503 gen_rtx_SET (VOIDmode,
3504 gen_rtx_MEM (word_mode, stack_pointer_rtx),
3509 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3511 RTX_FRAME_RELATED_P (insn) = 1;
3513 if (VAL_14_BITS_P (actual_fsize))
3514 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3517 /* It is incorrect to store the saved frame pointer at *sp,
3518 then increment sp (writes beyond the current stack boundary).
3520 So instead use stwm to store at *sp and post-increment the
3521 stack pointer as an atomic operation. Then increment sp to
3522 finish allocating the new frame. */
3523 int adjust1 = 8192 - 64;
3524 int adjust2 = actual_fsize - adjust1;
3526 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3527 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3531 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3532 we need to store the previous stack pointer (frame pointer)
3533 into the frame marker on targets that use the HP unwind
3534 library. This allows the HP unwind library to be used to
3535 unwind GCC frames. However, we are not fully compatible
3536 with the HP library because our frame layout differs from
3537 that specified in the HP runtime specification.
3539 We don't want a frame note on this instruction as the frame
3540 marker moves during dynamic stack allocation.
3542 This instruction also serves as a blockage to prevent
3543 register spills from being scheduled before the stack
3544 pointer is raised. This is necessary as we store
3545 registers using the frame pointer as a base register,
3546 and the frame pointer is set before sp is raised. */
3547 if (TARGET_HPUX_UNWIND_LIBRARY)
3549 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3550 GEN_INT (TARGET_64BIT ? -8 : -4));
3552 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3556 emit_insn (gen_blockage ());
3558 /* no frame pointer needed. */
3561 /* In some cases we can perform the first callee register save
3562 and allocating the stack frame at the same time. If so, just
3563 make a note of it and defer allocating the frame until saving
3564 the callee registers. */
3565 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3566 merge_sp_adjust_with_store = 1;
3567 /* Can not optimize. Adjust the stack frame by actual_fsize
3570 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3575 /* Normal register save.
3577 Do not save the frame pointer in the frame_pointer_needed case. It
3578 was done earlier. */
3579 if (frame_pointer_needed)
3581 offset = local_fsize;
3583 /* Saving the EH return data registers in the frame is the simplest
3584 way to get the frame unwind information emitted. We put them
3585 just before the general registers. */
3586 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3588 unsigned int i, regno;
3592 regno = EH_RETURN_DATA_REGNO (i);
3593 if (regno == INVALID_REGNUM)
3596 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3597 offset += UNITS_PER_WORD;
3601 for (i = 18; i >= 4; i--)
3602 if (regs_ever_live[i] && ! call_used_regs[i])
3604 store_reg (i, offset, FRAME_POINTER_REGNUM);
3605 offset += UNITS_PER_WORD;
3608 /* Account for %r3 which is saved in a special place. */
3611 /* No frame pointer needed. */
3614 offset = local_fsize - actual_fsize;
3616 /* Saving the EH return data registers in the frame is the simplest
3617 way to get the frame unwind information emitted. */
3618 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3620 unsigned int i, regno;
3624 regno = EH_RETURN_DATA_REGNO (i);
3625 if (regno == INVALID_REGNUM)
3628 /* If merge_sp_adjust_with_store is nonzero, then we can
3629 optimize the first save. */
3630 if (merge_sp_adjust_with_store)
3632 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3633 merge_sp_adjust_with_store = 0;
3636 store_reg (regno, offset, STACK_POINTER_REGNUM);
3637 offset += UNITS_PER_WORD;
3641 for (i = 18; i >= 3; i--)
3642 if (regs_ever_live[i] && ! call_used_regs[i])
3644 /* If merge_sp_adjust_with_store is nonzero, then we can
3645 optimize the first GR save. */
3646 if (merge_sp_adjust_with_store)
3648 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3649 merge_sp_adjust_with_store = 0;
3652 store_reg (i, offset, STACK_POINTER_REGNUM);
3653 offset += UNITS_PER_WORD;
3657 /* If we wanted to merge the SP adjustment with a GR save, but we never
3658 did any GR saves, then just emit the adjustment here. */
3659 if (merge_sp_adjust_with_store)
3660 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3664 /* The hppa calling conventions say that %r19, the pic offset
3665 register, is saved at sp - 32 (in this function's frame)
3666 when generating PIC code. FIXME: What is the correct thing
3667 to do for functions which make no calls and allocate no
3668 frame? Do we need to allocate a frame, or can we just omit
3669 the save? For now we'll just omit the save.
3671 We don't want a note on this insn as the frame marker can
3672 move if there is a dynamic stack allocation. */
3673 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3675 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3677 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3681 /* Align pointer properly (doubleword boundary). */
3682 offset = (offset + 7) & ~7;
3684 /* Floating point register store. */
3689 /* First get the frame or stack pointer to the start of the FP register
3691 if (frame_pointer_needed)
3693 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3694 base = frame_pointer_rtx;
3698 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3699 base = stack_pointer_rtx;
3702 /* Now actually save the FP registers. */
3703 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3705 if (regs_ever_live[i]
3706 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3708 rtx addr, insn, reg;
3709 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3710 reg = gen_rtx_REG (DFmode, i);
3711 insn = emit_move_insn (addr, reg);
3714 RTX_FRAME_RELATED_P (insn) = 1;
3717 rtx mem = gen_rtx_MEM (DFmode,
3718 plus_constant (base, offset));
3720 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3721 gen_rtx_SET (VOIDmode, mem, reg),
3726 rtx meml = gen_rtx_MEM (SFmode,
3727 plus_constant (base, offset));
3728 rtx memr = gen_rtx_MEM (SFmode,
3729 plus_constant (base, offset + 4));
3730 rtx regl = gen_rtx_REG (SFmode, i);
3731 rtx regr = gen_rtx_REG (SFmode, i + 1);
3732 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3733 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3736 RTX_FRAME_RELATED_P (setl) = 1;
3737 RTX_FRAME_RELATED_P (setr) = 1;
3738 vec = gen_rtvec (2, setl, setr);
3740 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3741 gen_rtx_SEQUENCE (VOIDmode, vec),
3745 offset += GET_MODE_SIZE (DFmode);
3752 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3753 Handle case where DISP > 8k by using the add_high_const patterns. */
3756 load_reg (int reg, int disp, int base)
3758 rtx src, dest, basereg;
3760 dest = gen_rtx_REG (word_mode, reg);
3761 basereg = gen_rtx_REG (Pmode, base);
3762 if (VAL_14_BITS_P (disp))
3764 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3765 emit_move_insn (dest, src);
3769 rtx delta = GEN_INT (disp);
3770 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3771 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3772 emit_move_insn (tmpreg, high);
3773 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3774 emit_move_insn (dest, src);
3778 /* Update the total code bytes output to the text section. */
3781 update_total_code_bytes (int nbytes)
3783 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
3784 && !IN_NAMED_SECTION_P (cfun->decl))
3786 if (INSN_ADDRESSES_SET_P ())
3788 unsigned long old_total = total_code_bytes;
3790 total_code_bytes += nbytes;
3792 /* Be prepared to handle overflows. */
3793 if (old_total > total_code_bytes)
3794 total_code_bytes = -1;
3797 total_code_bytes = -1;
3801 /* This function generates the assembly code for function exit.
3802 Args are as for output_function_prologue ().
3804 The function epilogue should not depend on the current stack
3805 pointer! It should use the frame pointer only. This is mandatory
3806 because of alloca; we also take advantage of it to omit stack
3807 adjustments before returning. */
3810 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3812 rtx insn = get_last_insn ();
3816 /* hppa_expand_epilogue does the dirty work now. We just need
3817 to output the assembler directives which denote the end
3820 To make debuggers happy, emit a nop if the epilogue was completely
3821 eliminated due to a volatile call as the last insn in the
3822 current function. That way the return address (in %r2) will
3823 always point to a valid instruction in the current function. */
3825 /* Get the last real insn. */
3826 if (GET_CODE (insn) == NOTE)
3827 insn = prev_real_insn (insn);
3829 /* If it is a sequence, then look inside. */
3830 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3831 insn = XVECEXP (PATTERN (insn), 0, 0);
3833 /* If insn is a CALL_INSN, then it must be a call to a volatile
3834 function (otherwise there would be epilogue insns). */
3835 if (insn && GET_CODE (insn) == CALL_INSN)
3837 fputs ("\tnop\n", file);
3841 fputs ("\t.EXIT\n\t.PROCEND\n", file);
3843 if (INSN_ADDRESSES_SET_P ())
3845 insn = get_last_nonnote_insn ();
3846 last_address += INSN_ADDRESSES (INSN_UID (insn));
3848 last_address += insn_default_length (insn);
3849 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
3850 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
3853 /* Finally, update the total number of code bytes output so far. */
3854 update_total_code_bytes (last_address);
3858 hppa_expand_epilogue (void)
3862 int merge_sp_adjust_with_load = 0;
3865 /* We will use this often. */
3866 tmpreg = gen_rtx_REG (word_mode, 1);
3868 /* Try to restore RP early to avoid load/use interlocks when
3869 RP gets used in the return (bv) instruction. This appears to still
3870 be necessary even when we schedule the prologue and epilogue. */
3871 if (regs_ever_live [2] || current_function_calls_eh_return)
3873 ret_off = TARGET_64BIT ? -16 : -20;
3874 if (frame_pointer_needed)
3876 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
3881 /* No frame pointer, and stack is smaller than 8k. */
3882 if (VAL_14_BITS_P (ret_off - actual_fsize))
3884 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
3890 /* General register restores. */
3891 if (frame_pointer_needed)
3893 offset = local_fsize;
3895 /* If the current function calls __builtin_eh_return, then we need
3896 to restore the saved EH data registers. */
3897 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3899 unsigned int i, regno;
3903 regno = EH_RETURN_DATA_REGNO (i);
3904 if (regno == INVALID_REGNUM)
3907 load_reg (regno, offset, FRAME_POINTER_REGNUM);
3908 offset += UNITS_PER_WORD;
3912 for (i = 18; i >= 4; i--)
3913 if (regs_ever_live[i] && ! call_used_regs[i])
3915 load_reg (i, offset, FRAME_POINTER_REGNUM);
3916 offset += UNITS_PER_WORD;
3921 offset = local_fsize - actual_fsize;
3923 /* If the current function calls __builtin_eh_return, then we need
3924 to restore the saved EH data registers. */
3925 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3927 unsigned int i, regno;
3931 regno = EH_RETURN_DATA_REGNO (i);
3932 if (regno == INVALID_REGNUM)
3935 /* Only for the first load.
3936 merge_sp_adjust_with_load holds the register load
3937 with which we will merge the sp adjustment. */
3938 if (merge_sp_adjust_with_load == 0
3940 && VAL_14_BITS_P (-actual_fsize))
3941 merge_sp_adjust_with_load = regno;
3943 load_reg (regno, offset, STACK_POINTER_REGNUM);
3944 offset += UNITS_PER_WORD;
3948 for (i = 18; i >= 3; i--)
3950 if (regs_ever_live[i] && ! call_used_regs[i])
3952 /* Only for the first load.
3953 merge_sp_adjust_with_load holds the register load
3954 with which we will merge the sp adjustment. */
3955 if (merge_sp_adjust_with_load == 0
3957 && VAL_14_BITS_P (-actual_fsize))
3958 merge_sp_adjust_with_load = i;
3960 load_reg (i, offset, STACK_POINTER_REGNUM);
3961 offset += UNITS_PER_WORD;
3966 /* Align pointer properly (doubleword boundary). */
3967 offset = (offset + 7) & ~7;
3969 /* FP register restores. */
3972 /* Adjust the register to index off of. */
3973 if (frame_pointer_needed)
3974 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3976 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3978 /* Actually do the restores now. */
3979 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3980 if (regs_ever_live[i]
3981 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3983 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3984 rtx dest = gen_rtx_REG (DFmode, i);
3985 emit_move_insn (dest, src);
3989 /* Emit a blockage insn here to keep these insns from being moved to
3990 an earlier spot in the epilogue, or into the main instruction stream.
3992 This is necessary as we must not cut the stack back before all the
3993 restores are finished. */
3994 emit_insn (gen_blockage ());
3996 /* Reset stack pointer (and possibly frame pointer). The stack
3997 pointer is initially set to fp + 64 to avoid a race condition. */
3998 if (frame_pointer_needed)
4000 rtx delta = GEN_INT (-64);
4002 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4003 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4005 /* If we were deferring a callee register restore, do it now. */
4006 else if (merge_sp_adjust_with_load)
4008 rtx delta = GEN_INT (-actual_fsize);
4009 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4011 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4013 else if (actual_fsize != 0)
4014 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4017 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4018 frame greater than 8k), do so now. */
4020 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4022 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4024 rtx sa = EH_RETURN_STACKADJ_RTX;
4026 emit_insn (gen_blockage ());
4027 emit_insn (TARGET_64BIT
4028 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4029 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4034 hppa_pic_save_rtx (void)
4036 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4040 hppa_profile_hook (int label_no)
4042 /* We use SImode for the address of the function in both 32 and
4043 64-bit code to avoid having to provide DImode versions of the
4044 lcla2 and load_offset_label_address insn patterns. */
4045 rtx reg = gen_reg_rtx (SImode);
4046 rtx label_rtx = gen_label_rtx ();
4047 rtx begin_label_rtx, call_insn;
4048 char begin_label_name[16];
4050 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4052 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4055 emit_move_insn (arg_pointer_rtx,
4056 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4059 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4061 /* The address of the function is loaded into %r25 with a instruction-
4062 relative sequence that avoids the use of relocations. The sequence
4063 is split so that the load_offset_label_address instruction can
4064 occupy the delay slot of the call to _mcount. */
4066 emit_insn (gen_lcla2 (reg, label_rtx));
4068 emit_insn (gen_lcla1 (reg, label_rtx));
4070 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4071 reg, begin_label_rtx, label_rtx));
4073 #ifndef NO_PROFILE_COUNTERS
4075 rtx count_label_rtx, addr, r24;
4076 char count_label_name[16];
4078 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4079 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4081 addr = force_reg (Pmode, count_label_rtx);
4082 r24 = gen_rtx_REG (Pmode, 24);
4083 emit_move_insn (r24, addr);
4086 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4087 gen_rtx_SYMBOL_REF (Pmode,
4089 GEN_INT (TARGET_64BIT ? 24 : 12)));
4091 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4096 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4097 gen_rtx_SYMBOL_REF (Pmode,
4099 GEN_INT (TARGET_64BIT ? 16 : 8)));
4103 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4104 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4106 /* Indicate the _mcount call cannot throw, nor will it execute a
4108 REG_NOTES (call_insn)
4109 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
4112 /* Fetch the return address for the frame COUNT steps up from
4113 the current frame, after the prologue. FRAMEADDR is the
4114 frame pointer of the COUNT frame.
4116 We want to ignore any export stub remnants here. To handle this,
4117 we examine the code at the return address, and if it is an export
4118 stub, we return a memory rtx for the stub return address stored
4121 The value returned is used in two different ways:
4123 1. To find a function's caller.
4125 2. To change the return address for a function.
4127 This function handles most instances of case 1; however, it will
4128 fail if there are two levels of stubs to execute on the return
4129 path. The only way I believe that can happen is if the return value
4130 needs a parameter relocation, which never happens for C code.
4132 This function handles most instances of case 2; however, it will
4133 fail if we did not originally have stub code on the return path
4134 but will need stub code on the new return path. This can happen if
4135 the caller & callee are both in the main program, but the new
4136 return location is in a shared library. */
4139 return_addr_rtx (int count, rtx frameaddr)
4149 rp = get_hard_reg_initial_val (Pmode, 2);
4151 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4154 saved_rp = gen_reg_rtx (Pmode);
4155 emit_move_insn (saved_rp, rp);
4157 /* Get pointer to the instruction stream. We have to mask out the
4158 privilege level from the two low order bits of the return address
4159 pointer here so that ins will point to the start of the first
4160 instruction that would have been executed if we returned. */
4161 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4162 label = gen_label_rtx ();
4164 /* Check the instruction stream at the normal return address for the
4167 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4168 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4169 0x00011820 | stub+16: mtsp r1,sr0
4170 0xe0400002 | stub+20: be,n 0(sr0,rp)
4172 If it is an export stub, than our return address is really in
4175 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4176 NULL_RTX, SImode, 1);
4177 emit_jump_insn (gen_bne (label));
4179 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4180 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4181 emit_jump_insn (gen_bne (label));
4183 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4184 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4185 emit_jump_insn (gen_bne (label));
4187 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4188 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
4190 /* If there is no export stub then just use the value saved from
4191 the return pointer register. */
4193 emit_jump_insn (gen_bne (label));
4195 /* Here we know that our return address points to an export
4196 stub. We don't want to return the address of the export stub,
4197 but rather the return address of the export stub. That return
4198 address is stored at -24[frameaddr]. */
4200 emit_move_insn (saved_rp,
4202 memory_address (Pmode,
4203 plus_constant (frameaddr,
4210 /* This is only valid once reload has completed because it depends on
4211 knowing exactly how much (if any) frame there is and...
4213 It's only valid if there is no frame marker to de-allocate and...
4215 It's only valid if %r2 hasn't been saved into the caller's frame
4216 (we're not profiling and %r2 isn't live anywhere). */
4218 hppa_can_use_return_insn_p (void)
4220 return (reload_completed
4221 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
4222 && ! regs_ever_live[2]
4223 && ! frame_pointer_needed);
4227 emit_bcond_fp (enum rtx_code code, rtx operand0)
4229 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4230 gen_rtx_IF_THEN_ELSE (VOIDmode,
4231 gen_rtx_fmt_ee (code,
4233 gen_rtx_REG (CCFPmode, 0),
4235 gen_rtx_LABEL_REF (VOIDmode, operand0),
4241 gen_cmp_fp (enum rtx_code code, rtx operand0, rtx operand1)
4243 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4244 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4247 /* Adjust the cost of a scheduling dependency. Return the new cost of
4248 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4251 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4253 enum attr_type attr_type;
4255 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4256 true dependencies as they are described with bypasses now. */
4257 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4260 if (! recog_memoized (insn))
4263 attr_type = get_attr_type (insn);
4265 if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
4267 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4270 if (attr_type == TYPE_FPLOAD)
4272 rtx pat = PATTERN (insn);
4273 rtx dep_pat = PATTERN (dep_insn);
4274 if (GET_CODE (pat) == PARALLEL)
4276 /* This happens for the fldXs,mb patterns. */
4277 pat = XVECEXP (pat, 0, 0);
4279 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4280 /* If this happens, we have to extend this to schedule
4281 optimally. Return 0 for now. */
4284 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4286 if (! recog_memoized (dep_insn))
4288 switch (get_attr_type (dep_insn))
4295 case TYPE_FPSQRTSGL:
4296 case TYPE_FPSQRTDBL:
4297 /* A fpload can't be issued until one cycle before a
4298 preceding arithmetic operation has finished if
4299 the target of the fpload is any of the sources
4300 (or destination) of the arithmetic operation. */
4301 return insn_default_latency (dep_insn) - 1;
4308 else if (attr_type == TYPE_FPALU)
4310 rtx pat = PATTERN (insn);
4311 rtx dep_pat = PATTERN (dep_insn);
4312 if (GET_CODE (pat) == PARALLEL)
4314 /* This happens for the fldXs,mb patterns. */
4315 pat = XVECEXP (pat, 0, 0);
4317 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4318 /* If this happens, we have to extend this to schedule
4319 optimally. Return 0 for now. */
4322 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4324 if (! recog_memoized (dep_insn))
4326 switch (get_attr_type (dep_insn))
4330 case TYPE_FPSQRTSGL:
4331 case TYPE_FPSQRTDBL:
4332 /* An ALU flop can't be issued until two cycles before a
4333 preceding divide or sqrt operation has finished if
4334 the target of the ALU flop is any of the sources
4335 (or destination) of the divide or sqrt operation. */
4336 return insn_default_latency (dep_insn) - 2;
4344 /* For other anti dependencies, the cost is 0. */
4347 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4349 /* Output dependency; DEP_INSN writes a register that INSN writes some
4351 if (attr_type == TYPE_FPLOAD)
4353 rtx pat = PATTERN (insn);
4354 rtx dep_pat = PATTERN (dep_insn);
4355 if (GET_CODE (pat) == PARALLEL)
4357 /* This happens for the fldXs,mb patterns. */
4358 pat = XVECEXP (pat, 0, 0);
4360 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4361 /* If this happens, we have to extend this to schedule
4362 optimally. Return 0 for now. */
4365 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4367 if (! recog_memoized (dep_insn))
4369 switch (get_attr_type (dep_insn))
4376 case TYPE_FPSQRTSGL:
4377 case TYPE_FPSQRTDBL:
4378 /* A fpload can't be issued until one cycle before a
4379 preceding arithmetic operation has finished if
4380 the target of the fpload is the destination of the
4381 arithmetic operation.
4383 Exception: For PA7100LC, PA7200 and PA7300, the cost
4384 is 3 cycles, unless they bundle together. We also
4385 pay the penalty if the second insn is a fpload. */
4386 return insn_default_latency (dep_insn) - 1;
4393 else if (attr_type == TYPE_FPALU)
4395 rtx pat = PATTERN (insn);
4396 rtx dep_pat = PATTERN (dep_insn);
4397 if (GET_CODE (pat) == PARALLEL)
4399 /* This happens for the fldXs,mb patterns. */
4400 pat = XVECEXP (pat, 0, 0);
4402 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4403 /* If this happens, we have to extend this to schedule
4404 optimally. Return 0 for now. */
4407 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4409 if (! recog_memoized (dep_insn))
4411 switch (get_attr_type (dep_insn))
4415 case TYPE_FPSQRTSGL:
4416 case TYPE_FPSQRTDBL:
4417 /* An ALU flop can't be issued until two cycles before a
4418 preceding divide or sqrt operation has finished if
4419 the target of the ALU flop is also the target of
4420 the divide or sqrt operation. */
4421 return insn_default_latency (dep_insn) - 2;
4429 /* For other output dependencies, the cost is 0. */
4436 /* Adjust scheduling priorities. We use this to try and keep addil
4437 and the next use of %r1 close together. */
4439 pa_adjust_priority (rtx insn, int priority)
4441 rtx set = single_set (insn);
4445 src = SET_SRC (set);
4446 dest = SET_DEST (set);
4447 if (GET_CODE (src) == LO_SUM
4448 && symbolic_operand (XEXP (src, 1), VOIDmode)
4449 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4452 else if (GET_CODE (src) == MEM
4453 && GET_CODE (XEXP (src, 0)) == LO_SUM
4454 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4455 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4458 else if (GET_CODE (dest) == MEM
4459 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4460 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4461 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4467 /* The 700 can only issue a single insn at a time.
4468 The 7XXX processors can issue two insns at a time.
4469 The 8000 can issue 4 insns at a time. */
4471 pa_issue_rate (void)
4475 case PROCESSOR_700: return 1;
4476 case PROCESSOR_7100: return 2;
4477 case PROCESSOR_7100LC: return 2;
4478 case PROCESSOR_7200: return 2;
4479 case PROCESSOR_7300: return 2;
4480 case PROCESSOR_8000: return 4;
4489 /* Return any length adjustment needed by INSN which already has its length
4490 computed as LENGTH. Return zero if no adjustment is necessary.
4492 For the PA: function calls, millicode calls, and backwards short
4493 conditional branches with unfilled delay slots need an adjustment by +1
4494 (to account for the NOP which will be inserted into the instruction stream).
4496 Also compute the length of an inline block move here as it is too
4497 complicated to express as a length attribute in pa.md. */
4499 pa_adjust_insn_length (rtx insn, int length)
4501 rtx pat = PATTERN (insn);
4503 /* Jumps inside switch tables which have unfilled delay slots need
4505 if (GET_CODE (insn) == JUMP_INSN
4506 && GET_CODE (pat) == PARALLEL
4507 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4509 /* Millicode insn with an unfilled delay slot. */
4510 else if (GET_CODE (insn) == INSN
4511 && GET_CODE (pat) != SEQUENCE
4512 && GET_CODE (pat) != USE
4513 && GET_CODE (pat) != CLOBBER
4514 && get_attr_type (insn) == TYPE_MILLI)
4516 /* Block move pattern. */
4517 else if (GET_CODE (insn) == INSN
4518 && GET_CODE (pat) == PARALLEL
4519 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4520 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4521 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4522 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4523 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4524 return compute_movstr_length (insn) - 4;
4525 /* Block clear pattern. */
4526 else if (GET_CODE (insn) == INSN
4527 && GET_CODE (pat) == PARALLEL
4528 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4529 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4530 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4531 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4532 return compute_clrstr_length (insn) - 4;
4533 /* Conditional branch with an unfilled delay slot. */
4534 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4536 /* Adjust a short backwards conditional with an unfilled delay slot. */
4537 if (GET_CODE (pat) == SET
4539 && ! forward_branch_p (insn))
4541 else if (GET_CODE (pat) == PARALLEL
4542 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4545 /* Adjust dbra insn with short backwards conditional branch with
4546 unfilled delay slot -- only for case where counter is in a
4547 general register register. */
4548 else if (GET_CODE (pat) == PARALLEL
4549 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4550 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4551 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4553 && ! forward_branch_p (insn))
4561 /* Print operand X (an rtx) in assembler syntax to file FILE.
4562 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4563 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4566 print_operand (FILE *file, rtx x, int code)
4571 /* Output a 'nop' if there's nothing for the delay slot. */
4572 if (dbr_sequence_length () == 0)
4573 fputs ("\n\tnop", file);
4576 /* Output a nullification completer if there's nothing for the */
4577 /* delay slot or nullification is requested. */
4578 if (dbr_sequence_length () == 0 ||
4580 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4584 /* Print out the second register name of a register pair.
4585 I.e., R (6) => 7. */
4586 fputs (reg_names[REGNO (x) + 1], file);
4589 /* A register or zero. */
4591 || (x == CONST0_RTX (DFmode))
4592 || (x == CONST0_RTX (SFmode)))
4594 fputs ("%r0", file);
4600 /* A register or zero (floating point). */
4602 || (x == CONST0_RTX (DFmode))
4603 || (x == CONST0_RTX (SFmode)))
4605 fputs ("%fr0", file);
4614 xoperands[0] = XEXP (XEXP (x, 0), 0);
4615 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4616 output_global_address (file, xoperands[1], 0);
4617 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4621 case 'C': /* Plain (C)ondition */
4623 switch (GET_CODE (x))
4626 fputs ("=", file); break;
4628 fputs ("<>", file); break;
4630 fputs (">", file); break;
4632 fputs (">=", file); break;
4634 fputs (">>=", file); break;
4636 fputs (">>", file); break;
4638 fputs ("<", file); break;
4640 fputs ("<=", file); break;
4642 fputs ("<<=", file); break;
4644 fputs ("<<", file); break;
4649 case 'N': /* Condition, (N)egated */
4650 switch (GET_CODE (x))
4653 fputs ("<>", file); break;
4655 fputs ("=", file); break;
4657 fputs ("<=", file); break;
4659 fputs ("<", file); break;
4661 fputs ("<<", file); break;
4663 fputs ("<<=", file); break;
4665 fputs (">=", file); break;
4667 fputs (">", file); break;
4669 fputs (">>", file); break;
4671 fputs (">>=", file); break;
4676 /* For floating point comparisons. Note that the output
4677 predicates are the complement of the desired mode. */
4679 switch (GET_CODE (x))
4682 fputs ("!=", file); break;
4684 fputs ("=", file); break;
4686 fputs ("!>", file); break;
4688 fputs ("!>=", file); break;
4690 fputs ("!<", file); break;
4692 fputs ("!<=", file); break;
4694 fputs ("!<>", file); break;
4696 fputs (">", file); break;
4698 fputs (">=", file); break;
4700 fputs ("<", file); break;
4702 fputs ("<=", file); break;
4704 fputs ("<>", file); break;
4706 fputs ("<=>", file); break;
4708 fputs ("!<=>", file); break;
4713 case 'S': /* Condition, operands are (S)wapped. */
4714 switch (GET_CODE (x))
4717 fputs ("=", file); break;
4719 fputs ("<>", file); break;
4721 fputs ("<", file); break;
4723 fputs ("<=", file); break;
4725 fputs ("<<=", file); break;
4727 fputs ("<<", file); break;
4729 fputs (">", file); break;
4731 fputs (">=", file); break;
4733 fputs (">>=", file); break;
4735 fputs (">>", file); break;
4740 case 'B': /* Condition, (B)oth swapped and negate. */
4741 switch (GET_CODE (x))
4744 fputs ("<>", file); break;
4746 fputs ("=", file); break;
4748 fputs (">=", file); break;
4750 fputs (">", file); break;
4752 fputs (">>", file); break;
4754 fputs (">>=", file); break;
4756 fputs ("<=", file); break;
4758 fputs ("<", file); break;
4760 fputs ("<<", file); break;
4762 fputs ("<<=", file); break;
4768 if (GET_CODE (x) == CONST_INT)
4770 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4775 if (GET_CODE (x) == CONST_INT)
4777 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4782 if (GET_CODE (x) == CONST_INT)
4784 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4789 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
4791 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4796 if (GET_CODE (x) == CONST_INT)
4798 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4803 if (GET_CODE (x) == CONST_INT)
4805 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4810 if (GET_CODE (x) == CONST_INT)
4815 switch (GET_CODE (XEXP (x, 0)))
4819 if (ASSEMBLER_DIALECT == 0)
4820 fputs ("s,mb", file);
4822 fputs (",mb", file);
4826 if (ASSEMBLER_DIALECT == 0)
4827 fputs ("s,ma", file);
4829 fputs (",ma", file);
4832 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4833 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4835 if (ASSEMBLER_DIALECT == 0)
4836 fputs ("x,s", file);
4840 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4844 if (code == 'F' && ASSEMBLER_DIALECT == 0)
4850 output_global_address (file, x, 0);
4853 output_global_address (file, x, 1);
4855 case 0: /* Don't do anything special */
4860 compute_zdepwi_operands (INTVAL (x), op);
4861 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4867 compute_zdepdi_operands (INTVAL (x), op);
4868 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4872 /* We can get here from a .vtable_inherit due to our
4873 CONSTANT_ADDRESS_P rejecting perfectly good constant
4879 if (GET_CODE (x) == REG)
4881 fputs (reg_names [REGNO (x)], file);
4882 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
4888 && GET_MODE_SIZE (GET_MODE (x)) <= 4
4889 && (REGNO (x) & 1) == 0)
4892 else if (GET_CODE (x) == MEM)
4894 int size = GET_MODE_SIZE (GET_MODE (x));
4895 rtx base = NULL_RTX;
4896 switch (GET_CODE (XEXP (x, 0)))
4900 base = XEXP (XEXP (x, 0), 0);
4901 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
4905 base = XEXP (XEXP (x, 0), 0);
4906 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
4909 if (GET_CODE (XEXP (x, 0)) == PLUS
4910 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
4911 fprintf (file, "%s(%s)",
4912 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
4913 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
4914 else if (GET_CODE (XEXP (x, 0)) == PLUS
4915 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4916 fprintf (file, "%s(%s)",
4917 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
4918 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
4920 output_address (XEXP (x, 0));
4925 output_addr_const (file, x);
4928 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
4931 output_global_address (FILE *file, rtx x, int round_constant)
4934 /* Imagine (high (const (plus ...))). */
4935 if (GET_CODE (x) == HIGH)
4938 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
4939 assemble_name (file, XSTR (x, 0));
4940 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
4942 assemble_name (file, XSTR (x, 0));
4943 fputs ("-$global$", file);
4945 else if (GET_CODE (x) == CONST)
4947 const char *sep = "";
4948 int offset = 0; /* assembler wants -$global$ at end */
4949 rtx base = NULL_RTX;
4951 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4953 base = XEXP (XEXP (x, 0), 0);
4954 output_addr_const (file, base);
4956 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
4957 offset = INTVAL (XEXP (XEXP (x, 0), 0));
4960 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
4962 base = XEXP (XEXP (x, 0), 1);
4963 output_addr_const (file, base);
4965 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
4966 offset = INTVAL (XEXP (XEXP (x, 0), 1));
4969 /* How bogus. The compiler is apparently responsible for
4970 rounding the constant if it uses an LR field selector.
4972 The linker and/or assembler seem a better place since
4973 they have to do this kind of thing already.
4975 If we fail to do this, HP's optimizing linker may eliminate
4976 an addil, but not update the ldw/stw/ldo instruction that
4977 uses the result of the addil. */
4979 offset = ((offset + 0x1000) & ~0x1fff);
4981 if (GET_CODE (XEXP (x, 0)) == PLUS)
4991 else if (GET_CODE (XEXP (x, 0)) == MINUS
4992 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4996 if (!read_only_operand (base, VOIDmode) && !flag_pic)
4997 fputs ("-$global$", file);
4999 fprintf (file, "%s%d", sep, offset);
5002 output_addr_const (file, x);
5005 /* Output boilerplate text to appear at the beginning of the file.
5006 There are several possible versions. */
5007 #define aputs(x) fputs(x, asm_out_file)
5009 pa_file_start_level (void)
5012 aputs ("\t.LEVEL 2.0w\n");
5013 else if (TARGET_PA_20)
5014 aputs ("\t.LEVEL 2.0\n");
5015 else if (TARGET_PA_11)
5016 aputs ("\t.LEVEL 1.1\n");
5018 aputs ("\t.LEVEL 1.0\n");
5022 pa_file_start_space (int sortspace)
5024 aputs ("\t.SPACE $PRIVATE$");
5027 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5028 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5029 "\n\t.SPACE $TEXT$");
5032 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5033 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5037 pa_file_start_file (int want_version)
5039 if (write_symbols != NO_DEBUG)
5041 output_file_directive (asm_out_file, main_input_filename);
5043 aputs ("\t.version\t\"01.01\"\n");
5048 pa_file_start_mcount (const char *aswhat)
5051 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5055 pa_elf_file_start (void)
5057 pa_file_start_level ();
5058 pa_file_start_mcount ("ENTRY");
5059 pa_file_start_file (0);
5063 pa_som_file_start (void)
5065 pa_file_start_level ();
5066 pa_file_start_space (0);
5067 aputs ("\t.IMPORT $global$,DATA\n"
5068 "\t.IMPORT $$dyncall,MILLICODE\n");
5069 pa_file_start_mcount ("CODE");
5070 pa_file_start_file (0);
5074 pa_linux_file_start (void)
5076 pa_file_start_file (1);
5077 pa_file_start_level ();
5078 pa_file_start_mcount ("CODE");
5082 pa_hpux64_gas_file_start (void)
5084 pa_file_start_level ();
5085 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5087 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5089 pa_file_start_file (1);
5093 pa_hpux64_hpas_file_start (void)
5095 pa_file_start_level ();
5096 pa_file_start_space (1);
5097 pa_file_start_mcount ("CODE");
5098 pa_file_start_file (0);
5102 static struct deferred_plabel *
5103 get_plabel (const char *fname)
5107 /* See if we have already put this function on the list of deferred
5108 plabels. This list is generally small, so a liner search is not
5109 too ugly. If it proves too slow replace it with something faster. */
5110 for (i = 0; i < n_deferred_plabels; i++)
5111 if (strcmp (fname, deferred_plabels[i].name) == 0)
5114 /* If the deferred plabel list is empty, or this entry was not found
5115 on the list, create a new entry on the list. */
5116 if (deferred_plabels == NULL || i == n_deferred_plabels)
5118 const char *real_name;
5120 if (deferred_plabels == 0)
5121 deferred_plabels = (struct deferred_plabel *)
5122 ggc_alloc (sizeof (struct deferred_plabel));
5124 deferred_plabels = (struct deferred_plabel *)
5125 ggc_realloc (deferred_plabels,
5126 ((n_deferred_plabels + 1)
5127 * sizeof (struct deferred_plabel)));
5129 i = n_deferred_plabels++;
5130 deferred_plabels[i].internal_label = gen_label_rtx ();
5131 deferred_plabels[i].name = ggc_strdup (fname);
5133 /* Gross. We have just implicitly taken the address of this function,
5135 real_name = (*targetm.strip_name_encoding) (fname);
5136 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
5139 return &deferred_plabels[i];
5143 output_deferred_plabels (void)
5146 /* If we have deferred plabels, then we need to switch into the data
5147 section and align it to a 4 byte boundary before we output the
5148 deferred plabels. */
5149 if (n_deferred_plabels)
5152 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5155 /* Now output the deferred plabels. */
5156 for (i = 0; i < n_deferred_plabels; i++)
5158 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5159 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5160 assemble_integer (gen_rtx_SYMBOL_REF (Pmode, deferred_plabels[i].name),
5161 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5165 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5166 /* Initialize optabs to point to HPUX long double emulation routines. */
5168 pa_hpux_init_libfuncs (void)
5170 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5171 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5172 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5173 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5174 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5175 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5176 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5177 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5178 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5180 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5181 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5182 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5183 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5184 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5185 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5187 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5188 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5189 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5190 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5192 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5193 ? "__U_Qfcnvfxt_quad_to_sgl"
5194 : "_U_Qfcnvfxt_quad_to_sgl");
5195 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5196 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5197 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5199 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5200 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5204 /* HP's millicode routines mean something special to the assembler.
5205 Keep track of which ones we have used. */
5207 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5208 static void import_milli (enum millicodes);
5209 static char imported[(int) end1000];
5210 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5211 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5212 #define MILLI_START 10
5215 import_milli (enum millicodes code)
5217 char str[sizeof (import_string)];
5219 if (!imported[(int) code])
5221 imported[(int) code] = 1;
5222 strcpy (str, import_string);
5223 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5224 output_asm_insn (str, 0);
5228 /* The register constraints have put the operands and return value in
5229 the proper registers. */
5232 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5234 import_milli (mulI);
5235 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5238 /* Emit the rtl for doing a division by a constant. */
5240 /* Do magic division millicodes exist for this value? */
5241 static const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
5244 /* We'll use an array to keep track of the magic millicodes and
5245 whether or not we've used them already. [n][0] is signed, [n][1] is
5248 static int div_milli[16][2];
5251 div_operand (rtx op, enum machine_mode mode)
5253 return (mode == SImode
5254 && ((GET_CODE (op) == REG && REGNO (op) == 25)
5255 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
5256 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
5260 emit_hpdiv_const (rtx *operands, int unsignedp)
5262 if (GET_CODE (operands[2]) == CONST_INT
5263 && INTVAL (operands[2]) > 0
5264 && INTVAL (operands[2]) < 16
5265 && magic_milli[INTVAL (operands[2])])
5267 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5269 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5272 (PARALLEL, VOIDmode,
5273 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5274 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5276 gen_rtx_REG (SImode, 26),
5278 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5279 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5280 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5281 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5282 gen_rtx_CLOBBER (VOIDmode, ret))));
5283 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5290 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5294 /* If the divisor is a constant, try to use one of the special
5296 if (GET_CODE (operands[0]) == CONST_INT)
5298 static char buf[100];
5299 divisor = INTVAL (operands[0]);
5300 if (!div_milli[divisor][unsignedp])
5302 div_milli[divisor][unsignedp] = 1;
5304 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5306 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5310 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5311 INTVAL (operands[0]));
5312 return output_millicode_call (insn,
5313 gen_rtx_SYMBOL_REF (SImode, buf));
5317 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5318 INTVAL (operands[0]));
5319 return output_millicode_call (insn,
5320 gen_rtx_SYMBOL_REF (SImode, buf));
5323 /* Divisor isn't a special constant. */
5328 import_milli (divU);
5329 return output_millicode_call (insn,
5330 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5334 import_milli (divI);
5335 return output_millicode_call (insn,
5336 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5341 /* Output a $$rem millicode to do mod. */
5344 output_mod_insn (int unsignedp, rtx insn)
5348 import_milli (remU);
5349 return output_millicode_call (insn,
5350 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5354 import_milli (remI);
5355 return output_millicode_call (insn,
5356 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5361 output_arg_descriptor (rtx call_insn)
5363 const char *arg_regs[4];
5364 enum machine_mode arg_mode;
5366 int i, output_flag = 0;
5369 /* We neither need nor want argument location descriptors for the
5370 64bit runtime environment or the ELF32 environment. */
5371 if (TARGET_64BIT || TARGET_ELF32)
5374 for (i = 0; i < 4; i++)
5377 /* Specify explicitly that no argument relocations should take place
5378 if using the portable runtime calling conventions. */
5379 if (TARGET_PORTABLE_RUNTIME)
5381 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5386 if (GET_CODE (call_insn) != CALL_INSN)
5388 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
5390 rtx use = XEXP (link, 0);
5392 if (! (GET_CODE (use) == USE
5393 && GET_CODE (XEXP (use, 0)) == REG
5394 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5397 arg_mode = GET_MODE (XEXP (use, 0));
5398 regno = REGNO (XEXP (use, 0));
5399 if (regno >= 23 && regno <= 26)
5401 arg_regs[26 - regno] = "GR";
5402 if (arg_mode == DImode)
5403 arg_regs[25 - regno] = "GR";
5405 else if (regno >= 32 && regno <= 39)
5407 if (arg_mode == SFmode)
5408 arg_regs[(regno - 32) / 2] = "FR";
5411 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5412 arg_regs[(regno - 34) / 2] = "FR";
5413 arg_regs[(regno - 34) / 2 + 1] = "FU";
5415 arg_regs[(regno - 34) / 2] = "FU";
5416 arg_regs[(regno - 34) / 2 + 1] = "FR";
5421 fputs ("\t.CALL ", asm_out_file);
5422 for (i = 0; i < 4; i++)
5427 fputc (',', asm_out_file);
5428 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5431 fputc ('\n', asm_out_file);
5434 /* Return the class of any secondary reload register that is needed to
5435 move IN into a register in class CLASS using mode MODE.
5437 Profiling has showed this routine and its descendants account for
5438 a significant amount of compile time (~7%). So it has been
5439 optimized to reduce redundant computations and eliminate useless
5442 It might be worthwhile to try and make this a leaf function too. */
5445 secondary_reload_class (enum reg_class class, enum machine_mode mode, rtx in)
5447 int regno, is_symbolic;
5449 /* Trying to load a constant into a FP register during PIC code
5450 generation will require %r1 as a scratch register. */
5452 && GET_MODE_CLASS (mode) == MODE_INT
5453 && FP_REG_CLASS_P (class)
5454 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
5457 /* Profiling showed the PA port spends about 1.3% of its compilation
5458 time in true_regnum from calls inside secondary_reload_class. */
5460 if (GET_CODE (in) == REG)
5463 if (regno >= FIRST_PSEUDO_REGISTER)
5464 regno = true_regnum (in);
5466 else if (GET_CODE (in) == SUBREG)
5467 regno = true_regnum (in);
5471 /* If we have something like (mem (mem (...)), we can safely assume the
5472 inner MEM will end up in a general register after reloading, so there's
5473 no need for a secondary reload. */
5474 if (GET_CODE (in) == MEM
5475 && GET_CODE (XEXP (in, 0)) == MEM)
5478 /* Handle out of range displacement for integer mode loads/stores of
5480 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5481 && GET_MODE_CLASS (mode) == MODE_INT
5482 && FP_REG_CLASS_P (class))
5483 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5484 return GENERAL_REGS;
5486 /* A SAR<->FP register copy requires a secondary register (GPR) as
5487 well as secondary memory. */
5488 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5489 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5490 || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5491 return GENERAL_REGS;
5493 if (GET_CODE (in) == HIGH)
5496 /* Profiling has showed GCC spends about 2.6% of its compilation
5497 time in symbolic_operand from calls inside secondary_reload_class.
5499 We use an inline copy and only compute its return value once to avoid
5501 switch (GET_CODE (in))
5511 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
5512 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
5513 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
5523 && read_only_operand (in, VOIDmode))
5526 if (class != R1_REGS && is_symbolic)
5533 function_arg_padding (enum machine_mode mode, tree type)
5536 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5538 /* Return none if justification is not required. */
5540 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5541 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5544 /* The directions set here are ignored when a BLKmode argument larger
5545 than a word is placed in a register. Different code is used for
5546 the stack and registers. This makes it difficult to have a
5547 consistent data representation for both the stack and registers.
5548 For both runtimes, the justification and padding for arguments on
5549 the stack and in registers should be identical. */
5551 /* The 64-bit runtime specifies left justification for aggregates. */
5554 /* The 32-bit runtime architecture specifies right justification.
5555 When the argument is passed on the stack, the argument is padded
5556 with garbage on the left. The HP compiler pads with zeros. */
5560 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5567 /* Do what is necessary for `va_start'. We look at the current function
5568 to determine if stdargs or varargs is used and fill in an initial
5569 va_list. A pointer to this constructor is returned. */
5572 hppa_builtin_saveregs (void)
5575 tree fntype = TREE_TYPE (current_function_decl);
5576 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5577 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5578 != void_type_node)))
5579 ? UNITS_PER_WORD : 0);
5582 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5584 offset = current_function_arg_offset_rtx;
5590 /* Adjust for varargs/stdarg differences. */
5592 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5594 offset = current_function_arg_offset_rtx;
5596 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5597 from the incoming arg pointer and growing to larger addresses. */
5598 for (i = 26, off = -64; i >= 19; i--, off += 8)
5599 emit_move_insn (gen_rtx_MEM (word_mode,
5600 plus_constant (arg_pointer_rtx, off)),
5601 gen_rtx_REG (word_mode, i));
5603 /* The incoming args pointer points just beyond the flushback area;
5604 normally this is not a serious concern. However, when we are doing
5605 varargs/stdargs we want to make the arg pointer point to the start
5606 of the incoming argument area. */
5607 emit_move_insn (virtual_incoming_args_rtx,
5608 plus_constant (arg_pointer_rtx, -64));
5610 /* Now return a pointer to the first anonymous argument. */
5611 return copy_to_reg (expand_binop (Pmode, add_optab,
5612 virtual_incoming_args_rtx,
5613 offset, 0, 0, OPTAB_LIB_WIDEN));
5616 /* Store general registers on the stack. */
5617 dest = gen_rtx_MEM (BLKmode,
5618 plus_constant (current_function_internal_arg_pointer,
5620 set_mem_alias_set (dest, get_varargs_alias_set ());
5621 set_mem_align (dest, BITS_PER_WORD);
5622 move_block_from_reg (23, dest, 4);
5624 /* move_block_from_reg will emit code to store the argument registers
5625 individually as scalar stores.
5627 However, other insns may later load from the same addresses for
5628 a structure load (passing a struct to a varargs routine).
5630 The alias code assumes that such aliasing can never happen, so we
5631 have to keep memory referencing insns from moving up beyond the
5632 last argument register store. So we emit a blockage insn here. */
5633 emit_insn (gen_blockage ());
5635 return copy_to_reg (expand_binop (Pmode, add_optab,
5636 current_function_internal_arg_pointer,
5637 offset, 0, 0, OPTAB_LIB_WIDEN));
5641 hppa_va_start (tree valist, rtx nextarg)
5643 nextarg = expand_builtin_saveregs ();
5644 std_expand_builtin_va_start (valist, nextarg);
5648 hppa_va_arg (tree valist, tree type)
5650 HOST_WIDE_INT size = int_size_in_bytes (type);
5656 /* Every argument in PA64 is supposed to be passed by value
5657 (including large structs). However, as a GCC extension, we
5658 pass zero and variable sized arguments by reference. Empty
5659 structures are a GCC extension not supported by the HP
5660 compilers. Thus, passing them by reference isn't likely
5661 to conflict with the ABI. For variable sized arguments,
5662 GCC doesn't have the infrastructure to allocate these to
5665 /* Arguments with a size greater than 8 must be aligned 0 MOD 16. */
5667 if (size > UNITS_PER_WORD)
5669 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5670 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
5671 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
5672 build_int_2 (-2 * UNITS_PER_WORD, -1));
5673 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5674 TREE_SIDE_EFFECTS (t) = 1;
5675 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5679 return std_expand_builtin_va_arg (valist, type);
5682 ptr = build_pointer_type (type);
5684 /* Args grow upward. */
5685 t = build (POSTINCREMENT_EXPR, TREE_TYPE (valist), valist,
5686 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5687 TREE_SIDE_EFFECTS (t) = 1;
5689 pptr = build_pointer_type (ptr);
5690 t = build1 (NOP_EXPR, pptr, t);
5691 TREE_SIDE_EFFECTS (t) = 1;
5693 t = build1 (INDIRECT_REF, ptr, t);
5694 TREE_SIDE_EFFECTS (t) = 1;
5697 else /* !TARGET_64BIT */
5699 ptr = build_pointer_type (type);
5701 /* "Large" and variable sized types are passed by reference. */
5702 if (size > 8 || size <= 0)
5704 /* Args grow downward. */
5705 t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
5706 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5707 TREE_SIDE_EFFECTS (t) = 1;
5709 pptr = build_pointer_type (ptr);
5710 t = build1 (NOP_EXPR, pptr, t);
5711 TREE_SIDE_EFFECTS (t) = 1;
5713 t = build1 (INDIRECT_REF, ptr, t);
5714 TREE_SIDE_EFFECTS (t) = 1;
5718 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5719 build_int_2 (-size, -1));
5721 /* Copied from va-pa.h, but we probably don't need to align to
5722 word size, since we generate and preserve that invariant. */
5723 t = build (BIT_AND_EXPR, TREE_TYPE (valist), t,
5724 build_int_2 ((size > 4 ? -8 : -4), -1));
5726 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5727 TREE_SIDE_EFFECTS (t) = 1;
5729 ofs = (8 - size) % 4;
5732 t = build (PLUS_EXPR, TREE_TYPE (valist), t,
5733 build_int_2 (ofs, 0));
5734 TREE_SIDE_EFFECTS (t) = 1;
5737 t = build1 (NOP_EXPR, ptr, t);
5738 TREE_SIDE_EFFECTS (t) = 1;
5743 return expand_expr (t, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5748 /* This routine handles all the normal conditional branch sequences we
5749 might need to generate. It handles compare immediate vs compare
5750 register, nullification of delay slots, varying length branches,
5751 negated branches, and all combinations of the above. It returns the
5752 output appropriate to emit the branch corresponding to all given
5756 output_cbranch (rtx *operands, int nullify, int length, int negated, rtx insn)
5758 static char buf[100];
5762 /* A conditional branch to the following instruction (eg the delay slot)
5763 is asking for a disaster. This can happen when not optimizing and
5764 when jump optimization fails.
5766 While it is usually safe to emit nothing, this can fail if the
5767 preceding instruction is a nullified branch with an empty delay
5768 slot and the same branch target as this branch. We could check
5769 for this but jump optimization should eliminate nop jumps. It
5770 is always safe to emit a nop. */
5771 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
5774 /* The doubleword form of the cmpib instruction doesn't have the LEU
5775 and GTU conditions while the cmpb instruction does. Since we accept
5776 zero for cmpb, we must ensure that we use cmpb for the comparison. */
5777 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
5778 operands[2] = gen_rtx_REG (DImode, 0);
5780 /* If this is a long branch with its delay slot unfilled, set `nullify'
5781 as it can nullify the delay slot and save a nop. */
5782 if (length == 8 && dbr_sequence_length () == 0)
5785 /* If this is a short forward conditional branch which did not get
5786 its delay slot filled, the delay slot can still be nullified. */
5787 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5788 nullify = forward_branch_p (insn);
5790 /* A forward branch over a single nullified insn can be done with a
5791 comclr instruction. This avoids a single cycle penalty due to
5792 mis-predicted branch if we fall through (branch not taken). */
5794 && next_real_insn (insn) != 0
5795 && get_attr_length (next_real_insn (insn)) == 4
5796 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5802 /* All short conditional branches except backwards with an unfilled
5806 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5808 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5809 if (GET_MODE (operands[1]) == DImode)
5812 strcat (buf, "%B3");
5814 strcat (buf, "%S3");
5816 strcat (buf, " %2,%r1,%%r0");
5818 strcat (buf, ",n %2,%r1,%0");
5820 strcat (buf, " %2,%r1,%0");
5823 /* All long conditionals. Note a short backward branch with an
5824 unfilled delay slot is treated just like a long backward branch
5825 with an unfilled delay slot. */
5827 /* Handle weird backwards branch with a filled delay slot
5828 with is nullified. */
5829 if (dbr_sequence_length () != 0
5830 && ! forward_branch_p (insn)
5833 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5834 if (GET_MODE (operands[1]) == DImode)
5837 strcat (buf, "%S3");
5839 strcat (buf, "%B3");
5840 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
5842 /* Handle short backwards branch with an unfilled delay slot.
5843 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
5844 taken and untaken branches. */
5845 else if (dbr_sequence_length () == 0
5846 && ! forward_branch_p (insn)
5847 && INSN_ADDRESSES_SET_P ()
5848 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5849 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5851 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5852 if (GET_MODE (operands[1]) == DImode)
5855 strcat (buf, "%B3 %2,%r1,%0%#");
5857 strcat (buf, "%S3 %2,%r1,%0%#");
5861 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5862 if (GET_MODE (operands[1]) == DImode)
5865 strcat (buf, "%S3");
5867 strcat (buf, "%B3");
5869 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
5871 strcat (buf, " %2,%r1,%%r0\n\tb %0");
5877 xoperands[0] = operands[0];
5878 xoperands[1] = operands[1];
5879 xoperands[2] = operands[2];
5880 xoperands[3] = operands[3];
5882 /* The reversed conditional branch must branch over one additional
5883 instruction if the delay slot is filled. If the delay slot
5884 is empty, the instruction after the reversed condition branch
5885 must be nullified. */
5886 nullify = dbr_sequence_length () == 0;
5887 xoperands[4] = nullify ? GEN_INT (length) : GEN_INT (length + 4);
5889 /* Create a reversed conditional branch which branches around
5890 the following insns. */
5891 if (GET_MODE (operands[1]) != DImode)
5897 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
5900 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
5906 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
5909 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
5918 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
5921 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
5927 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
5930 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
5934 output_asm_insn (buf, xoperands);
5935 return output_lbranch (operands[0], insn);
5943 /* This routine handles long unconditional branches that exceed the
5944 maximum range of a simple branch instruction. */
5947 output_lbranch (rtx dest, rtx insn)
5951 xoperands[0] = dest;
5953 /* First, free up the delay slot. */
5954 if (dbr_sequence_length () != 0)
5956 /* We can't handle a jump in the delay slot. */
5957 if (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN)
5960 final_scan_insn (NEXT_INSN (insn), asm_out_file,
5963 /* Now delete the delay insn. */
5964 PUT_CODE (NEXT_INSN (insn), NOTE);
5965 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5966 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5969 /* Output an insn to save %r1. The runtime documentation doesn't
5970 specify whether the "Clean Up" slot in the callers frame can
5971 be clobbered by the callee. It isn't copied by HP's builtin
5972 alloca, so this suggests that it can be clobbered if necessary.
5973 The "Static Link" location is copied by HP builtin alloca, so
5974 we avoid using it. Using the cleanup slot might be a problem
5975 if we have to interoperate with languages that pass cleanup
5976 information. However, it should be possible to handle these
5977 situations with GCC's asm feature.
5979 The "Current RP" slot is reserved for the called procedure, so
5980 we try to use it when we don't have a frame of our own. It's
5981 rather unlikely that we won't have a frame when we need to emit
5984 Really the way to go long term is a register scavenger; goto
5985 the target of the jump and find a register which we can use
5986 as a scratch to hold the value in %r1. Then, we wouldn't have
5987 to free up the delay slot or clobber a slot that may be needed
5988 for other purposes. */
5991 if (actual_fsize == 0 && !regs_ever_live[2])
5992 /* Use the return pointer slot in the frame marker. */
5993 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
5995 /* Use the slot at -40 in the frame marker since HP builtin
5996 alloca doesn't copy it. */
5997 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6001 if (actual_fsize == 0 && !regs_ever_live[2])
6002 /* Use the return pointer slot in the frame marker. */
6003 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6005 /* Use the "Clean Up" slot in the frame marker. In GCC,
6006 the only other use of this location is for copying a
6007 floating point double argument from a floating-point
6008 register to two general registers. The copy is done
6009 as an "atomic" operation when outputting a call, so it
6010 won't interfere with our using the location here. */
6011 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6014 if (TARGET_PORTABLE_RUNTIME)
6016 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6017 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6018 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6022 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6023 if (TARGET_SOM || !TARGET_GAS)
6025 xoperands[1] = gen_label_rtx ();
6026 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6027 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6028 CODE_LABEL_NUMBER (xoperands[1]));
6029 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6033 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6034 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6036 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6039 /* Now output a very long branch to the original target. */
6040 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6042 /* Now restore the value of %r1 in the delay slot. */
6045 if (actual_fsize == 0 && !regs_ever_live[2])
6046 return "ldd -16(%%r30),%%r1";
6048 return "ldd -40(%%r30),%%r1";
6052 if (actual_fsize == 0 && !regs_ever_live[2])
6053 return "ldw -20(%%r30),%%r1";
6055 return "ldw -12(%%r30),%%r1";
6059 /* This routine handles all the branch-on-bit conditional branch sequences we
6060 might need to generate. It handles nullification of delay slots,
6061 varying length branches, negated branches and all combinations of the
6062 above. it returns the appropriate output template to emit the branch. */
6065 output_bb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6066 int negated, rtx insn, int which)
6068 static char buf[100];
6071 /* A conditional branch to the following instruction (eg the delay slot) is
6072 asking for a disaster. I do not think this can happen as this pattern
6073 is only used when optimizing; jump optimization should eliminate the
6074 jump. But be prepared just in case. */
6076 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6079 /* If this is a long branch with its delay slot unfilled, set `nullify'
6080 as it can nullify the delay slot and save a nop. */
6081 if (length == 8 && dbr_sequence_length () == 0)
6084 /* If this is a short forward conditional branch which did not get
6085 its delay slot filled, the delay slot can still be nullified. */
6086 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6087 nullify = forward_branch_p (insn);
6089 /* A forward branch over a single nullified insn can be done with a
6090 extrs instruction. This avoids a single cycle penalty due to
6091 mis-predicted branch if we fall through (branch not taken). */
6094 && next_real_insn (insn) != 0
6095 && get_attr_length (next_real_insn (insn)) == 4
6096 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6103 /* All short conditional branches except backwards with an unfilled
6107 strcpy (buf, "{extrs,|extrw,s,}");
6109 strcpy (buf, "bb,");
6110 if (useskip && GET_MODE (operands[0]) == DImode)
6111 strcpy (buf, "extrd,s,*");
6112 else if (GET_MODE (operands[0]) == DImode)
6113 strcpy (buf, "bb,*");
6114 if ((which == 0 && negated)
6115 || (which == 1 && ! negated))
6120 strcat (buf, " %0,%1,1,%%r0");
6121 else if (nullify && negated)
6122 strcat (buf, ",n %0,%1,%3");
6123 else if (nullify && ! negated)
6124 strcat (buf, ",n %0,%1,%2");
6125 else if (! nullify && negated)
6126 strcat (buf, "%0,%1,%3");
6127 else if (! nullify && ! negated)
6128 strcat (buf, " %0,%1,%2");
6131 /* All long conditionals. Note a short backward branch with an
6132 unfilled delay slot is treated just like a long backward branch
6133 with an unfilled delay slot. */
6135 /* Handle weird backwards branch with a filled delay slot
6136 with is nullified. */
6137 if (dbr_sequence_length () != 0
6138 && ! forward_branch_p (insn)
6141 strcpy (buf, "bb,");
6142 if (GET_MODE (operands[0]) == DImode)
6144 if ((which == 0 && negated)
6145 || (which == 1 && ! negated))
6150 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6152 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6154 /* Handle short backwards branch with an unfilled delay slot.
6155 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6156 taken and untaken branches. */
6157 else if (dbr_sequence_length () == 0
6158 && ! forward_branch_p (insn)
6159 && INSN_ADDRESSES_SET_P ()
6160 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6161 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6163 strcpy (buf, "bb,");
6164 if (GET_MODE (operands[0]) == DImode)
6166 if ((which == 0 && negated)
6167 || (which == 1 && ! negated))
6172 strcat (buf, " %0,%1,%3%#");
6174 strcat (buf, " %0,%1,%2%#");
6178 strcpy (buf, "{extrs,|extrw,s,}");
6179 if (GET_MODE (operands[0]) == DImode)
6180 strcpy (buf, "extrd,s,*");
6181 if ((which == 0 && negated)
6182 || (which == 1 && ! negated))
6186 if (nullify && negated)
6187 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6188 else if (nullify && ! negated)
6189 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6191 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6193 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6203 /* This routine handles all the branch-on-variable-bit conditional branch
6204 sequences we might need to generate. It handles nullification of delay
6205 slots, varying length branches, negated branches and all combinations
6206 of the above. it returns the appropriate output template to emit the
6210 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6211 int negated, rtx insn, int which)
6213 static char buf[100];
6216 /* A conditional branch to the following instruction (eg the delay slot) is
6217 asking for a disaster. I do not think this can happen as this pattern
6218 is only used when optimizing; jump optimization should eliminate the
6219 jump. But be prepared just in case. */
6221 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6224 /* If this is a long branch with its delay slot unfilled, set `nullify'
6225 as it can nullify the delay slot and save a nop. */
6226 if (length == 8 && dbr_sequence_length () == 0)
6229 /* If this is a short forward conditional branch which did not get
6230 its delay slot filled, the delay slot can still be nullified. */
6231 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6232 nullify = forward_branch_p (insn);
6234 /* A forward branch over a single nullified insn can be done with a
6235 extrs instruction. This avoids a single cycle penalty due to
6236 mis-predicted branch if we fall through (branch not taken). */
6239 && next_real_insn (insn) != 0
6240 && get_attr_length (next_real_insn (insn)) == 4
6241 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6248 /* All short conditional branches except backwards with an unfilled
6252 strcpy (buf, "{vextrs,|extrw,s,}");
6254 strcpy (buf, "{bvb,|bb,}");
6255 if (useskip && GET_MODE (operands[0]) == DImode)
6256 strcpy (buf, "extrd,s,*");
6257 else if (GET_MODE (operands[0]) == DImode)
6258 strcpy (buf, "bb,*");
6259 if ((which == 0 && negated)
6260 || (which == 1 && ! negated))
6265 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6266 else if (nullify && negated)
6267 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6268 else if (nullify && ! negated)
6269 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6270 else if (! nullify && negated)
6271 strcat (buf, "{%0,%3|%0,%%sar,%3}");
6272 else if (! nullify && ! negated)
6273 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6276 /* All long conditionals. Note a short backward branch with an
6277 unfilled delay slot is treated just like a long backward branch
6278 with an unfilled delay slot. */
6280 /* Handle weird backwards branch with a filled delay slot
6281 with is nullified. */
6282 if (dbr_sequence_length () != 0
6283 && ! forward_branch_p (insn)
6286 strcpy (buf, "{bvb,|bb,}");
6287 if (GET_MODE (operands[0]) == DImode)
6289 if ((which == 0 && negated)
6290 || (which == 1 && ! negated))
6295 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6297 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6299 /* Handle short backwards branch with an unfilled delay slot.
6300 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6301 taken and untaken branches. */
6302 else if (dbr_sequence_length () == 0
6303 && ! forward_branch_p (insn)
6304 && INSN_ADDRESSES_SET_P ()
6305 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6306 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6308 strcpy (buf, "{bvb,|bb,}");
6309 if (GET_MODE (operands[0]) == DImode)
6311 if ((which == 0 && negated)
6312 || (which == 1 && ! negated))
6317 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6319 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6323 strcpy (buf, "{vextrs,|extrw,s,}");
6324 if (GET_MODE (operands[0]) == DImode)
6325 strcpy (buf, "extrd,s,*");
6326 if ((which == 0 && negated)
6327 || (which == 1 && ! negated))
6331 if (nullify && negated)
6332 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6333 else if (nullify && ! negated)
6334 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6336 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6338 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6348 /* Return the output template for emitting a dbra type insn.
6350 Note it may perform some output operations on its own before
6351 returning the final output string. */
6353 output_dbra (rtx *operands, rtx insn, int which_alternative)
6356 /* A conditional branch to the following instruction (eg the delay slot) is
6357 asking for a disaster. Be prepared! */
6359 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6361 if (which_alternative == 0)
6362 return "ldo %1(%0),%0";
6363 else if (which_alternative == 1)
6365 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6366 output_asm_insn ("ldw -16(%%r30),%4", operands);
6367 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6368 return "{fldws|fldw} -16(%%r30),%0";
6372 output_asm_insn ("ldw %0,%4", operands);
6373 return "ldo %1(%4),%4\n\tstw %4,%0";
6377 if (which_alternative == 0)
6379 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6380 int length = get_attr_length (insn);
6382 /* If this is a long branch with its delay slot unfilled, set `nullify'
6383 as it can nullify the delay slot and save a nop. */
6384 if (length == 8 && dbr_sequence_length () == 0)
6387 /* If this is a short forward conditional branch which did not get
6388 its delay slot filled, the delay slot can still be nullified. */
6389 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6390 nullify = forward_branch_p (insn);
6392 /* Handle short versions first. */
6393 if (length == 4 && nullify)
6394 return "addib,%C2,n %1,%0,%3";
6395 else if (length == 4 && ! nullify)
6396 return "addib,%C2 %1,%0,%3";
6397 else if (length == 8)
6399 /* Handle weird backwards branch with a fulled delay slot
6400 which is nullified. */
6401 if (dbr_sequence_length () != 0
6402 && ! forward_branch_p (insn)
6404 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6405 /* Handle short backwards branch with an unfilled delay slot.
6406 Using a addb;nop rather than addi;bl saves 1 cycle for both
6407 taken and untaken branches. */
6408 else if (dbr_sequence_length () == 0
6409 && ! forward_branch_p (insn)
6410 && INSN_ADDRESSES_SET_P ()
6411 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6412 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6413 return "addib,%C2 %1,%0,%3%#";
6415 /* Handle normal cases. */
6417 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6419 return "addi,%N2 %1,%0,%0\n\tb %3";
6424 /* Deal with gross reload from FP register case. */
6425 else if (which_alternative == 1)
6427 /* Move loop counter from FP register to MEM then into a GR,
6428 increment the GR, store the GR into MEM, and finally reload
6429 the FP register from MEM from within the branch's delay slot. */
6430 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6432 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6433 if (get_attr_length (insn) == 24)
6434 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6436 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6438 /* Deal with gross reload from memory case. */
6441 /* Reload loop counter from memory, the store back to memory
6442 happens in the branch's delay slot. */
6443 output_asm_insn ("ldw %0,%4", operands);
6444 if (get_attr_length (insn) == 12)
6445 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6447 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6451 /* Return the output template for emitting a dbra type insn.
6453 Note it may perform some output operations on its own before
6454 returning the final output string. */
6456 output_movb (rtx *operands, rtx insn, int which_alternative,
6457 int reverse_comparison)
6460 /* A conditional branch to the following instruction (eg the delay slot) is
6461 asking for a disaster. Be prepared! */
6463 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6465 if (which_alternative == 0)
6466 return "copy %1,%0";
6467 else if (which_alternative == 1)
6469 output_asm_insn ("stw %1,-16(%%r30)", operands);
6470 return "{fldws|fldw} -16(%%r30),%0";
6472 else if (which_alternative == 2)
6478 /* Support the second variant. */
6479 if (reverse_comparison)
6480 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6482 if (which_alternative == 0)
6484 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6485 int length = get_attr_length (insn);
6487 /* If this is a long branch with its delay slot unfilled, set `nullify'
6488 as it can nullify the delay slot and save a nop. */
6489 if (length == 8 && dbr_sequence_length () == 0)
6492 /* If this is a short forward conditional branch which did not get
6493 its delay slot filled, the delay slot can still be nullified. */
6494 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6495 nullify = forward_branch_p (insn);
6497 /* Handle short versions first. */
6498 if (length == 4 && nullify)
6499 return "movb,%C2,n %1,%0,%3";
6500 else if (length == 4 && ! nullify)
6501 return "movb,%C2 %1,%0,%3";
6502 else if (length == 8)
6504 /* Handle weird backwards branch with a filled delay slot
6505 which is nullified. */
6506 if (dbr_sequence_length () != 0
6507 && ! forward_branch_p (insn)
6509 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6511 /* Handle short backwards branch with an unfilled delay slot.
6512 Using a movb;nop rather than or;bl saves 1 cycle for both
6513 taken and untaken branches. */
6514 else if (dbr_sequence_length () == 0
6515 && ! forward_branch_p (insn)
6516 && INSN_ADDRESSES_SET_P ()
6517 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6518 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6519 return "movb,%C2 %1,%0,%3%#";
6520 /* Handle normal cases. */
6522 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6524 return "or,%N2 %1,%%r0,%0\n\tb %3";
6529 /* Deal with gross reload from FP register case. */
6530 else if (which_alternative == 1)
6532 /* Move loop counter from FP register to MEM then into a GR,
6533 increment the GR, store the GR into MEM, and finally reload
6534 the FP register from MEM from within the branch's delay slot. */
6535 output_asm_insn ("stw %1,-16(%%r30)", operands);
6536 if (get_attr_length (insn) == 12)
6537 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6539 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6541 /* Deal with gross reload from memory case. */
6542 else if (which_alternative == 2)
6544 /* Reload loop counter from memory, the store back to memory
6545 happens in the branch's delay slot. */
6546 if (get_attr_length (insn) == 8)
6547 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6549 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6551 /* Handle SAR as a destination. */
6554 if (get_attr_length (insn) == 8)
6555 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6557 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
6561 /* Copy any FP arguments in INSN into integer registers. */
6563 copy_fp_args (rtx insn)
6568 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6570 int arg_mode, regno;
6571 rtx use = XEXP (link, 0);
6573 if (! (GET_CODE (use) == USE
6574 && GET_CODE (XEXP (use, 0)) == REG
6575 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6578 arg_mode = GET_MODE (XEXP (use, 0));
6579 regno = REGNO (XEXP (use, 0));
6581 /* Is it a floating point register? */
6582 if (regno >= 32 && regno <= 39)
6584 /* Copy the FP register into an integer register via memory. */
6585 if (arg_mode == SFmode)
6587 xoperands[0] = XEXP (use, 0);
6588 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6589 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
6590 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6594 xoperands[0] = XEXP (use, 0);
6595 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6596 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
6597 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6598 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6604 /* Compute length of the FP argument copy sequence for INSN. */
6606 length_fp_args (rtx insn)
6611 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6613 int arg_mode, regno;
6614 rtx use = XEXP (link, 0);
6616 if (! (GET_CODE (use) == USE
6617 && GET_CODE (XEXP (use, 0)) == REG
6618 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6621 arg_mode = GET_MODE (XEXP (use, 0));
6622 regno = REGNO (XEXP (use, 0));
6624 /* Is it a floating point register? */
6625 if (regno >= 32 && regno <= 39)
6627 if (arg_mode == SFmode)
6637 /* Return the attribute length for the millicode call instruction INSN.
6638 The length must match the code generated by output_millicode_call.
6639 We include the delay slot in the returned length as it is better to
6640 over estimate the length than to under estimate it. */
6643 attr_length_millicode_call (rtx insn)
6645 unsigned long distance = -1;
6646 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
6648 if (INSN_ADDRESSES_SET_P ())
6650 distance = (total + insn_current_reference_address (insn));
6651 if (distance < total)
6657 if (!TARGET_LONG_CALLS && distance < 7600000)
6662 else if (TARGET_PORTABLE_RUNTIME)
6666 if (!TARGET_LONG_CALLS && distance < 240000)
6669 if (TARGET_LONG_ABS_CALL && !flag_pic)
6676 /* INSN is a function call. It may have an unconditional jump
6679 CALL_DEST is the routine we are calling. */
6682 output_millicode_call (rtx insn, rtx call_dest)
6684 int attr_length = get_attr_length (insn);
6685 int seq_length = dbr_sequence_length ();
6690 xoperands[0] = call_dest;
6691 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6693 /* Handle the common case where we are sure that the branch will
6694 reach the beginning of the $CODE$ subspace. The within reach
6695 form of the $$sh_func_adrs call has a length of 28. Because
6696 it has an attribute type of multi, it never has a nonzero
6697 sequence length. The length of the $$sh_func_adrs is the same
6698 as certain out of reach PIC calls to other routines. */
6699 if (!TARGET_LONG_CALLS
6700 && ((seq_length == 0
6701 && (attr_length == 12
6702 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
6703 || (seq_length != 0 && attr_length == 8)))
6705 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
6711 /* It might seem that one insn could be saved by accessing
6712 the millicode function using the linkage table. However,
6713 this doesn't work in shared libraries and other dynamically
6714 loaded objects. Using a pc-relative sequence also avoids
6715 problems related to the implicit use of the gp register. */
6716 output_asm_insn ("b,l .+8,%%r1", xoperands);
6720 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
6721 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6725 xoperands[1] = gen_label_rtx ();
6726 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6727 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6728 CODE_LABEL_NUMBER (xoperands[1]));
6729 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6732 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6734 else if (TARGET_PORTABLE_RUNTIME)
6736 /* Pure portable runtime doesn't allow be/ble; we also don't
6737 have PIC support in the assembler/linker, so this sequence
6740 /* Get the address of our target into %r1. */
6741 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6742 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6744 /* Get our return address into %r31. */
6745 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
6746 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
6748 /* Jump to our target address in %r1. */
6749 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6753 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6755 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
6757 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6761 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6762 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
6764 if (TARGET_SOM || !TARGET_GAS)
6766 /* The HP assembler can generate relocations for the
6767 difference of two symbols. GAS can do this for a
6768 millicode symbol but not an arbitrary external
6769 symbol when generating SOM output. */
6770 xoperands[1] = gen_label_rtx ();
6771 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6772 CODE_LABEL_NUMBER (xoperands[1]));
6773 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6774 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6778 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
6779 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
6783 /* Jump to our target address in %r1. */
6784 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6788 if (seq_length == 0)
6789 output_asm_insn ("nop", xoperands);
6791 /* We are done if there isn't a jump in the delay slot. */
6792 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6795 /* This call has an unconditional jump in its delay slot. */
6796 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6798 /* See if the return address can be adjusted. Use the containing
6799 sequence insn's address. */
6800 if (INSN_ADDRESSES_SET_P ())
6802 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6803 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6804 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
6806 if (VAL_14_BITS_P (distance))
6808 xoperands[1] = gen_label_rtx ();
6809 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
6810 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6811 CODE_LABEL_NUMBER (xoperands[1]));
6814 /* ??? This branch may not reach its target. */
6815 output_asm_insn ("nop\n\tb,n %0", xoperands);
6818 /* ??? This branch may not reach its target. */
6819 output_asm_insn ("nop\n\tb,n %0", xoperands);
6821 /* Delete the jump. */
6822 PUT_CODE (NEXT_INSN (insn), NOTE);
6823 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6824 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6829 /* Return the attribute length of the call instruction INSN. The SIBCALL
6830 flag indicates whether INSN is a regular call or a sibling call. The
6831 length returned must be longer than the code actually generated by
6832 output_call. Since branch shortening is done before delay branch
6833 sequencing, there is no way to determine whether or not the delay
6834 slot will be filled during branch shortening. Even when the delay
6835 slot is filled, we may have to add a nop if the delay slot contains
6836 a branch that can't reach its target. Thus, we always have to include
6837 the delay slot in the length estimate. This used to be done in
6838 pa_adjust_insn_length but we do it here now as some sequences always
6839 fill the delay slot and we can save four bytes in the estimate for
6843 attr_length_call (rtx insn, int sibcall)
6849 rtx pat = PATTERN (insn);
6850 unsigned long distance = -1;
6852 if (INSN_ADDRESSES_SET_P ())
6854 unsigned long total;
6856 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
6857 distance = (total + insn_current_reference_address (insn));
6858 if (distance < total)
6862 /* Determine if this is a local call. */
6863 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL)
6864 call_dest = XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0);
6866 call_dest = XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0);
6868 call_decl = SYMBOL_REF_DECL (call_dest);
6869 local_call = call_decl && (*targetm.binds_local_p) (call_decl);
6871 /* pc-relative branch. */
6872 if (!TARGET_LONG_CALLS
6873 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
6874 || distance < 240000))
6877 /* 64-bit plabel sequence. */
6878 else if (TARGET_64BIT && !local_call)
6879 length += sibcall ? 28 : 24;
6881 /* non-pic long absolute branch sequence. */
6882 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
6885 /* long pc-relative branch sequence. */
6886 else if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
6887 || (TARGET_64BIT && !TARGET_GAS)
6888 || (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
6892 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS)
6896 /* 32-bit plabel sequence. */
6902 length += length_fp_args (insn);
6912 if (!TARGET_NO_SPACE_REGS)
6920 /* INSN is a function call. It may have an unconditional jump
6923 CALL_DEST is the routine we are calling. */
6926 output_call (rtx insn, rtx call_dest, int sibcall)
6928 int delay_insn_deleted = 0;
6929 int delay_slot_filled = 0;
6930 int seq_length = dbr_sequence_length ();
6931 tree call_decl = SYMBOL_REF_DECL (call_dest);
6932 int local_call = call_decl && (*targetm.binds_local_p) (call_decl);
6935 xoperands[0] = call_dest;
6937 /* Handle the common case where we're sure that the branch will reach
6938 the beginning of the "$CODE$" subspace. This is the beginning of
6939 the current function if we are in a named section. */
6940 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
6942 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
6943 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
6947 if (TARGET_64BIT && !local_call)
6949 /* ??? As far as I can tell, the HP linker doesn't support the
6950 long pc-relative sequence described in the 64-bit runtime
6951 architecture. So, we use a slightly longer indirect call. */
6952 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
6954 xoperands[0] = p->internal_label;
6955 xoperands[1] = gen_label_rtx ();
6957 /* If this isn't a sibcall, we put the load of %r27 into the
6958 delay slot. We can't do this in a sibcall as we don't
6959 have a second call-clobbered scratch register available. */
6961 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6964 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6967 /* Now delete the delay insn. */
6968 PUT_CODE (NEXT_INSN (insn), NOTE);
6969 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6970 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6971 delay_insn_deleted = 1;
6974 output_asm_insn ("addil LT'%0,%%r27", xoperands);
6975 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
6976 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
6980 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
6981 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
6982 output_asm_insn ("bve (%%r1)", xoperands);
6986 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
6987 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
6988 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
6989 delay_slot_filled = 1;
6994 int indirect_call = 0;
6996 /* Emit a long call. There are several different sequences
6997 of increasing length and complexity. In most cases,
6998 they don't allow an instruction in the delay slot. */
6999 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7000 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7001 && !(TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7006 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7008 && (!TARGET_PA_20 || indirect_call))
7010 /* A non-jump insn in the delay slot. By definition we can
7011 emit this insn before the call (and in fact before argument
7013 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
7015 /* Now delete the delay insn. */
7016 PUT_CODE (NEXT_INSN (insn), NOTE);
7017 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7018 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7019 delay_insn_deleted = 1;
7022 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7024 /* This is the best sequence for making long calls in
7025 non-pic code. Unfortunately, GNU ld doesn't provide
7026 the stub needed for external calls, and GAS's support
7027 for this with the SOM linker is buggy. It is safe
7028 to use this for local calls. */
7029 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7031 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7035 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7038 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7040 output_asm_insn ("copy %%r31,%%r2", xoperands);
7041 delay_slot_filled = 1;
7046 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7047 || (TARGET_64BIT && !TARGET_GAS))
7049 /* The HP assembler and linker can handle relocations
7050 for the difference of two symbols. GAS and the HP
7051 linker can't do this when one of the symbols is
7053 xoperands[1] = gen_label_rtx ();
7054 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7055 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7056 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7057 CODE_LABEL_NUMBER (xoperands[1]));
7058 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7060 else if (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7062 /* GAS currently can't generate the relocations that
7063 are needed for the SOM linker under HP-UX using this
7064 sequence. The GNU linker doesn't generate the stubs
7065 that are needed for external calls on TARGET_ELF32
7066 with this sequence. For now, we have to use a
7067 longer plabel sequence when using GAS. */
7068 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7069 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7071 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7076 /* Emit a long plabel-based call sequence. This is
7077 essentially an inline implementation of $$dyncall.
7078 We don't actually try to call $$dyncall as this is
7079 as difficult as calling the function itself. */
7080 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
7082 xoperands[0] = p->internal_label;
7083 xoperands[1] = gen_label_rtx ();
7085 /* Since the call is indirect, FP arguments in registers
7086 need to be copied to the general registers. Then, the
7087 argument relocation stub will copy them back. */
7089 copy_fp_args (insn);
7093 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7094 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7095 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7099 output_asm_insn ("addil LR'%0-$global$,%%r27",
7101 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7105 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7106 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7107 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7108 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7110 if (!sibcall && !TARGET_PA_20)
7112 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7113 if (TARGET_NO_SPACE_REGS)
7114 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7116 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7123 output_asm_insn ("bve (%%r1)", xoperands);
7128 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7129 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7130 delay_slot_filled = 1;
7133 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7138 if (!TARGET_NO_SPACE_REGS)
7139 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7144 if (TARGET_NO_SPACE_REGS)
7145 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7147 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7151 if (TARGET_NO_SPACE_REGS)
7152 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7154 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7157 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7159 output_asm_insn ("copy %%r31,%%r2", xoperands);
7160 delay_slot_filled = 1;
7167 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7168 output_asm_insn ("nop", xoperands);
7170 /* We are done if there isn't a jump in the delay slot. */
7172 || delay_insn_deleted
7173 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7176 /* A sibcall should never have a branch in the delay slot. */
7180 /* This call has an unconditional jump in its delay slot. */
7181 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7183 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7185 /* See if the return address can be adjusted. Use the containing
7186 sequence insn's address. */
7187 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7188 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7189 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7191 if (VAL_14_BITS_P (distance))
7193 xoperands[1] = gen_label_rtx ();
7194 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7195 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7196 CODE_LABEL_NUMBER (xoperands[1]));
7199 output_asm_insn ("nop\n\tb,n %0", xoperands);
7202 output_asm_insn ("b,n %0", xoperands);
7204 /* Delete the jump. */
7205 PUT_CODE (NEXT_INSN (insn), NOTE);
7206 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7207 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7212 /* Return the attribute length of the indirect call instruction INSN.
7213 The length must match the code generated by output_indirect call.
7214 The returned length includes the delay slot. Currently, the delay
7215 slot of an indirect call sequence is not exposed and it is used by
7216 the sequence itself. */
7219 attr_length_indirect_call (rtx insn)
7221 unsigned long distance = -1;
7222 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7224 if (INSN_ADDRESSES_SET_P ())
7226 distance = (total + insn_current_reference_address (insn));
7227 if (distance < total)
7234 if (TARGET_FAST_INDIRECT_CALLS
7235 || (!TARGET_PORTABLE_RUNTIME
7236 && ((TARGET_PA_20 && distance < 7600000) || distance < 240000)))
7242 if (TARGET_PORTABLE_RUNTIME)
7245 /* Out of reach, can use ble. */
7250 output_indirect_call (rtx insn, rtx call_dest)
7256 xoperands[0] = call_dest;
7257 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7258 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7262 /* First the special case for kernels, level 0 systems, etc. */
7263 if (TARGET_FAST_INDIRECT_CALLS)
7264 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7266 /* Now the normal case -- we can reach $$dyncall directly or
7267 we're sure that we can get there via a long-branch stub.
7269 No need to check target flags as the length uniquely identifies
7270 the remaining cases. */
7271 if (attr_length_indirect_call (insn) == 8)
7272 return ".CALL\tARGW0=GR\n\t{bl|b,l} $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7274 /* Long millicode call, but we are not generating PIC or portable runtime
7276 if (attr_length_indirect_call (insn) == 12)
7277 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7279 /* Long millicode call for portable runtime. */
7280 if (attr_length_indirect_call (insn) == 20)
7281 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7283 /* We need a long PIC call to $$dyncall. */
7284 xoperands[0] = NULL_RTX;
7285 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7286 if (TARGET_SOM || !TARGET_GAS)
7288 xoperands[0] = gen_label_rtx ();
7289 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7290 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7291 CODE_LABEL_NUMBER (xoperands[0]));
7292 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7296 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7297 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7300 output_asm_insn ("blr %%r0,%%r2", xoperands);
7301 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7305 /* Return the total length of the save and restore instructions needed for
7306 the data linkage table pointer (i.e., the PIC register) across the call
7307 instruction INSN. No-return calls do not require a save and restore.
7308 In addition, we may be able to avoid the save and restore for calls
7309 within the same translation unit. */
7312 attr_length_save_restore_dltp (rtx insn)
7314 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7320 /* In HPUX 8.0's shared library scheme, special relocations are needed
7321 for function labels if they might be passed to a function
7322 in a shared library (because shared libraries don't live in code
7323 space), and special magic is needed to construct their address. */
7326 hppa_encode_label (rtx sym)
7328 const char *str = XSTR (sym, 0);
7329 int len = strlen (str) + 1;
7332 p = newstr = alloca (len + 1);
7336 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7340 pa_encode_section_info (tree decl, rtx rtl, int first)
7342 if (first && TEXT_SPACE_P (decl))
7344 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7345 if (TREE_CODE (decl) == FUNCTION_DECL)
7346 hppa_encode_label (XEXP (rtl, 0));
7350 /* This is sort of inverse to pa_encode_section_info. */
7353 pa_strip_name_encoding (const char *str)
7355 str += (*str == '@');
7356 str += (*str == '*');
7361 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7363 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7366 /* Returns 1 if OP is a function label involved in a simple addition
7367 with a constant. Used to keep certain patterns from matching
7368 during instruction combination. */
7370 is_function_label_plus_const (rtx op)
7372 /* Strip off any CONST. */
7373 if (GET_CODE (op) == CONST)
7376 return (GET_CODE (op) == PLUS
7377 && function_label_operand (XEXP (op, 0), Pmode)
7378 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7381 /* Output assembly code for a thunk to FUNCTION. */
7384 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7385 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7388 const char *fname = XSTR (XEXP (DECL_RTL (function), 0), 0);
7389 const char *tname = XSTR (XEXP (DECL_RTL (thunk_fndecl), 0), 0);
7390 int val_14 = VAL_14_BITS_P (delta);
7392 static unsigned int current_thunk_number;
7395 ASM_OUTPUT_LABEL (file, tname);
7396 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7398 fname = (*targetm.strip_name_encoding) (fname);
7399 tname = (*targetm.strip_name_encoding) (tname);
7401 /* Output the thunk. We know that the function is in the same
7402 translation unit (i.e., the same space) as the thunk, and that
7403 thunks are output after their method. Thus, we don't need an
7404 external branch to reach the function. With SOM and GAS,
7405 functions and thunks are effectively in different sections.
7406 Thus, we can always use a IA-relative branch and the linker
7407 will add a long branch stub if necessary.
7409 However, we have to be careful when generating PIC code on the
7410 SOM port to ensure that the sequence does not transfer to an
7411 import stub for the target function as this could clobber the
7412 return value saved at SP-24. This would also apply to the
7413 32-bit linux port if the multi-space model is implemented. */
7414 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7415 && !(flag_pic && TREE_PUBLIC (function))
7416 && (TARGET_GAS || last_address < 262132))
7417 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7418 && ((targetm.have_named_sections
7419 && DECL_SECTION_NAME (thunk_fndecl) != NULL
7420 /* The GNU 64-bit linker has rather poor stub management.
7421 So, we use a long branch from thunks that aren't in
7422 the same section as the target function. */
7424 && (DECL_SECTION_NAME (thunk_fndecl)
7425 != DECL_SECTION_NAME (function)))
7426 || ((DECL_SECTION_NAME (thunk_fndecl)
7427 == DECL_SECTION_NAME (function))
7428 && last_address < 262132)))
7429 || (!targetm.have_named_sections && last_address < 262132))))
7433 fprintf (file, "\tb %s\n\tldo " HOST_WIDE_INT_PRINT_DEC
7434 "(%%r26),%%r26\n", fname, delta);
7439 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7441 fprintf (file, "\tb %s\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7442 "(%%r1),%%r26\n", fname, delta);
7446 else if (TARGET_64BIT)
7448 /* We only have one call-clobbered scratch register, so we can't
7449 make use of the delay slot if delta doesn't fit in 14 bits. */
7451 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7452 ",%%r26\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7453 "(%%r1),%%r26\n", delta, delta);
7455 fprintf (file, "\tb,l .+8,%%r1\n");
7459 fprintf (file, "\taddil L'%s-$PIC_pcrel$0+4,%%r1\n", fname);
7460 fprintf (file, "\tldo R'%s-$PIC_pcrel$0+8(%%r1),%%r1\n", fname);
7464 int off = val_14 ? 8 : 16;
7465 fprintf (file, "\taddil L'%s-%s-%d,%%r1\n", fname, tname, off);
7466 fprintf (file, "\tldo R'%s-%s-%d(%%r1),%%r1\n", fname, tname, off);
7471 fprintf (file, "\tbv %%r0(%%r1)\n\tldo ");
7472 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7477 fprintf (file, "\tbv,n %%r0(%%r1)\n");
7481 else if (TARGET_PORTABLE_RUNTIME)
7483 fprintf (file, "\tldil L'%s,%%r1\n", fname);
7484 fprintf (file, "\tldo R'%s(%%r1),%%r22\n", fname);
7488 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7489 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7494 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7496 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7497 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7501 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7503 /* The function is accessible from outside this module. The only
7504 way to avoid an import stub between the thunk and function is to
7505 call the function directly with an indirect sequence similar to
7506 that used by $$dyncall. This is possible because $$dyncall acts
7507 as the import stub in an indirect call. */
7510 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7511 lab = (*targetm.strip_name_encoding) (label);
7513 fprintf (file, "\taddil LT'%s,%%r19\n", lab);
7514 fprintf (file, "\tldw RT'%s(%%r1),%%r22\n", lab);
7515 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7516 fprintf (file, "\tbb,>=,n %%r22,30,.+16\n");
7517 fprintf (file, "\tdepi 0,31,2,%%r22\n");
7518 fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
7519 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7522 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7528 fprintf (file, "\tbve (%%r22)\n\tldo ");
7533 if (TARGET_NO_SPACE_REGS)
7535 fprintf (file, "\tbe 0(%%sr4,%%r22)\n\tldo ");
7540 fprintf (file, "\tldsid (%%sr0,%%r22),%%r21\n");
7541 fprintf (file, "\tmtsp %%r21,%%sr0\n");
7542 fprintf (file, "\tbe 0(%%sr0,%%r22)\n\tldo ");
7548 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7550 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7555 fprintf (file, "\tb,l .+8,%%r1\n");
7557 fprintf (file, "\tbl .+8,%%r1\n");
7559 if (TARGET_SOM || !TARGET_GAS)
7561 fprintf (file, "\taddil L'%s-%s-8,%%r1\n", fname, tname);
7562 fprintf (file, "\tldo R'%s-%s-8(%%r1),%%r22\n", fname, tname);
7566 fprintf (file, "\taddil L'%s-$PIC_pcrel$0+4,%%r1\n", fname);
7567 fprintf (file, "\tldo R'%s-$PIC_pcrel$0+8(%%r1),%%r22\n", fname);
7572 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7573 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7578 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7580 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7581 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7588 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC ",%%r26\n", delta);
7590 fprintf (file, "\tldil L'%s,%%r22\n", fname);
7591 fprintf (file, "\tbe R'%s(%%sr4,%%r22)\n\tldo ", fname);
7595 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7600 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7605 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
7607 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7610 fprintf (file, "\t.align 4\n");
7611 ASM_OUTPUT_LABEL (file, label);
7612 fprintf (file, "\t.word P'%s\n", fname);
7613 function_section (thunk_fndecl);
7616 current_thunk_number++;
7617 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
7618 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
7619 last_address += nbytes;
7620 update_total_code_bytes (nbytes);
7623 /* Only direct calls to static functions are allowed to be sibling (tail)
7626 This restriction is necessary because some linker generated stubs will
7627 store return pointers into rp' in some cases which might clobber a
7628 live value already in rp'.
7630 In a sibcall the current function and the target function share stack
7631 space. Thus if the path to the current function and the path to the
7632 target function save a value in rp', they save the value into the
7633 same stack slot, which has undesirable consequences.
7635 Because of the deferred binding nature of shared libraries any function
7636 with external scope could be in a different load module and thus require
7637 rp' to be saved when calling that function. So sibcall optimizations
7638 can only be safe for static function.
7640 Note that GCC never needs return value relocations, so we don't have to
7641 worry about static calls with return value relocations (which require
7644 It is safe to perform a sibcall optimization when the target function
7645 will never return. */
7647 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
7649 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
7650 single subspace mode and the call is not indirect. As far as I know,
7651 there is no operating system support for the multiple subspace mode.
7652 It might be possible to support indirect calls if we didn't use
7653 $$dyncall (see the indirect sequence generated in output_call). */
7655 return (decl != NULL_TREE);
7657 /* Sibcalls are not ok because the arg pointer register is not a fixed
7658 register. This prevents the sibcall optimization from occurring. In
7659 addition, there are problems with stub placement using GNU ld. This
7660 is because a normal sibcall branch uses a 17-bit relocation while
7661 a regular call branch uses a 22-bit relocation. As a result, more
7662 care needs to be taken in the placement of long-branch stubs. */
7667 && !TARGET_PORTABLE_RUNTIME
7668 && !TREE_PUBLIC (decl));
7671 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7672 use in fmpyadd instructions. */
7674 fmpyaddoperands (rtx *operands)
7676 enum machine_mode mode = GET_MODE (operands[0]);
7678 /* Must be a floating point mode. */
7679 if (mode != SFmode && mode != DFmode)
7682 /* All modes must be the same. */
7683 if (! (mode == GET_MODE (operands[1])
7684 && mode == GET_MODE (operands[2])
7685 && mode == GET_MODE (operands[3])
7686 && mode == GET_MODE (operands[4])
7687 && mode == GET_MODE (operands[5])))
7690 /* All operands must be registers. */
7691 if (! (GET_CODE (operands[1]) == REG
7692 && GET_CODE (operands[2]) == REG
7693 && GET_CODE (operands[3]) == REG
7694 && GET_CODE (operands[4]) == REG
7695 && GET_CODE (operands[5]) == REG))
7698 /* Only 2 real operands to the addition. One of the input operands must
7699 be the same as the output operand. */
7700 if (! rtx_equal_p (operands[3], operands[4])
7701 && ! rtx_equal_p (operands[3], operands[5]))
7704 /* Inout operand of add can not conflict with any operands from multiply. */
7705 if (rtx_equal_p (operands[3], operands[0])
7706 || rtx_equal_p (operands[3], operands[1])
7707 || rtx_equal_p (operands[3], operands[2]))
7710 /* multiply can not feed into addition operands. */
7711 if (rtx_equal_p (operands[4], operands[0])
7712 || rtx_equal_p (operands[5], operands[0]))
7715 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
7717 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
7718 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
7719 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
7720 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
7721 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
7722 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
7725 /* Passed. Operands are suitable for fmpyadd. */
7729 #if !defined(USE_COLLECT2)
7731 pa_asm_out_constructor (rtx symbol, int priority)
7733 if (!function_label_operand (symbol, VOIDmode))
7734 hppa_encode_label (symbol);
7736 #ifdef CTORS_SECTION_ASM_OP
7737 default_ctor_section_asm_out_constructor (symbol, priority);
7739 # ifdef TARGET_ASM_NAMED_SECTION
7740 default_named_section_asm_out_constructor (symbol, priority);
7742 default_stabs_asm_out_constructor (symbol, priority);
7748 pa_asm_out_destructor (rtx symbol, int priority)
7750 if (!function_label_operand (symbol, VOIDmode))
7751 hppa_encode_label (symbol);
7753 #ifdef DTORS_SECTION_ASM_OP
7754 default_dtor_section_asm_out_destructor (symbol, priority);
7756 # ifdef TARGET_ASM_NAMED_SECTION
7757 default_named_section_asm_out_destructor (symbol, priority);
7759 default_stabs_asm_out_destructor (symbol, priority);
7765 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7766 use in fmpysub instructions. */
7768 fmpysuboperands (rtx *operands)
7770 enum machine_mode mode = GET_MODE (operands[0]);
7772 /* Must be a floating point mode. */
7773 if (mode != SFmode && mode != DFmode)
7776 /* All modes must be the same. */
7777 if (! (mode == GET_MODE (operands[1])
7778 && mode == GET_MODE (operands[2])
7779 && mode == GET_MODE (operands[3])
7780 && mode == GET_MODE (operands[4])
7781 && mode == GET_MODE (operands[5])))
7784 /* All operands must be registers. */
7785 if (! (GET_CODE (operands[1]) == REG
7786 && GET_CODE (operands[2]) == REG
7787 && GET_CODE (operands[3]) == REG
7788 && GET_CODE (operands[4]) == REG
7789 && GET_CODE (operands[5]) == REG))
7792 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
7793 operation, so operands[4] must be the same as operand[3]. */
7794 if (! rtx_equal_p (operands[3], operands[4]))
7797 /* multiply can not feed into subtraction. */
7798 if (rtx_equal_p (operands[5], operands[0]))
7801 /* Inout operand of sub can not conflict with any operands from multiply. */
7802 if (rtx_equal_p (operands[3], operands[0])
7803 || rtx_equal_p (operands[3], operands[1])
7804 || rtx_equal_p (operands[3], operands[2]))
7807 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
7809 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
7810 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
7811 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
7812 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
7813 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
7814 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
7817 /* Passed. Operands are suitable for fmpysub. */
7822 plus_xor_ior_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7824 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
7825 || GET_CODE (op) == IOR);
7828 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
7829 constants for shadd instructions. */
7831 shadd_constant_p (int val)
7833 if (val == 2 || val == 4 || val == 8)
7839 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
7840 the valid constant for shadd instructions. */
7842 shadd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7844 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
7847 /* Return 1 if OP is valid as a base register in a reg + reg address. */
7850 basereg_operand (rtx op, enum machine_mode mode)
7852 /* cse will create some unscaled indexed addresses, however; it
7853 generally isn't a win on the PA, so avoid creating unscaled
7854 indexed addresses until after cse is finished. */
7855 if (!cse_not_expected)
7858 /* Allow any register when TARGET_NO_SPACE_REGS is in effect since
7859 we don't have to worry about the braindamaged implicit space
7860 register selection from the basereg. */
7861 if (TARGET_NO_SPACE_REGS)
7862 return (GET_CODE (op) == REG);
7864 /* While it's always safe to index off the frame pointer, it's not
7865 always profitable, particularly when the frame pointer is being
7867 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
7870 return (GET_CODE (op) == REG
7872 && register_operand (op, mode));
7875 /* Return 1 if this operand is anything other than a hard register. */
7878 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7880 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
7883 /* Return 1 if INSN branches forward. Should be using insn_addresses
7884 to avoid walking through all the insns... */
7886 forward_branch_p (rtx insn)
7888 rtx label = JUMP_LABEL (insn);
7895 insn = NEXT_INSN (insn);
7898 return (insn == label);
7901 /* Return 1 if OP is an equality comparison, else return 0. */
7903 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7905 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
7908 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
7910 movb_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7912 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
7913 || GET_CODE (op) == LT || GET_CODE (op) == GE);
7916 /* Return 1 if INSN is in the delay slot of a call instruction. */
7918 jump_in_call_delay (rtx insn)
7921 if (GET_CODE (insn) != JUMP_INSN)
7924 if (PREV_INSN (insn)
7925 && PREV_INSN (PREV_INSN (insn))
7926 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
7928 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
7930 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
7931 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
7938 /* Output an unconditional move and branch insn. */
7941 output_parallel_movb (rtx *operands, int length)
7943 /* These are the cases in which we win. */
7945 return "mov%I1b,tr %1,%0,%2";
7947 /* None of these cases wins, but they don't lose either. */
7948 if (dbr_sequence_length () == 0)
7950 /* Nothing in the delay slot, fake it by putting the combined
7951 insn (the copy or add) in the delay slot of a bl. */
7952 if (GET_CODE (operands[1]) == CONST_INT)
7953 return "b %2\n\tldi %1,%0";
7955 return "b %2\n\tcopy %1,%0";
7959 /* Something in the delay slot, but we've got a long branch. */
7960 if (GET_CODE (operands[1]) == CONST_INT)
7961 return "ldi %1,%0\n\tb %2";
7963 return "copy %1,%0\n\tb %2";
7967 /* Output an unconditional add and branch insn. */
7970 output_parallel_addb (rtx *operands, int length)
7972 /* To make life easy we want operand0 to be the shared input/output
7973 operand and operand1 to be the readonly operand. */
7974 if (operands[0] == operands[1])
7975 operands[1] = operands[2];
7977 /* These are the cases in which we win. */
7979 return "add%I1b,tr %1,%0,%3";
7981 /* None of these cases win, but they don't lose either. */
7982 if (dbr_sequence_length () == 0)
7984 /* Nothing in the delay slot, fake it by putting the combined
7985 insn (the copy or add) in the delay slot of a bl. */
7986 return "b %3\n\tadd%I1 %1,%0,%0";
7990 /* Something in the delay slot, but we've got a long branch. */
7991 return "add%I1 %1,%0,%0\n\tb %3";
7995 /* Return nonzero if INSN (a jump insn) immediately follows a call
7996 to a named function. This is used to avoid filling the delay slot
7997 of the jump since it can usually be eliminated by modifying RP in
7998 the delay slot of the call. */
8001 following_call (rtx insn)
8003 if (! TARGET_JUMP_IN_DELAY)
8006 /* Find the previous real insn, skipping NOTEs. */
8007 insn = PREV_INSN (insn);
8008 while (insn && GET_CODE (insn) == NOTE)
8009 insn = PREV_INSN (insn);
8011 /* Check for CALL_INSNs and millicode calls. */
8013 && ((GET_CODE (insn) == CALL_INSN
8014 && get_attr_type (insn) != TYPE_DYNCALL)
8015 || (GET_CODE (insn) == INSN
8016 && GET_CODE (PATTERN (insn)) != SEQUENCE
8017 && GET_CODE (PATTERN (insn)) != USE
8018 && GET_CODE (PATTERN (insn)) != CLOBBER
8019 && get_attr_type (insn) == TYPE_MILLI)))
8025 /* We use this hook to perform a PA specific optimization which is difficult
8026 to do in earlier passes.
8028 We want the delay slots of branches within jump tables to be filled.
8029 None of the compiler passes at the moment even has the notion that a
8030 PA jump table doesn't contain addresses, but instead contains actual
8033 Because we actually jump into the table, the addresses of each entry
8034 must stay constant in relation to the beginning of the table (which
8035 itself must stay constant relative to the instruction to jump into
8036 it). I don't believe we can guarantee earlier passes of the compiler
8037 will adhere to those rules.
8039 So, late in the compilation process we find all the jump tables, and
8040 expand them into real code -- eg each entry in the jump table vector
8041 will get an appropriate label followed by a jump to the final target.
8043 Reorg and the final jump pass can then optimize these branches and
8044 fill their delay slots. We end up with smaller, more efficient code.
8046 The jump instructions within the table are special; we must be able
8047 to identify them during assembly output (if the jumps don't get filled
8048 we need to emit a nop rather than nullifying the delay slot)). We
8049 identify jumps in switch tables by using insns with the attribute
8050 type TYPE_BTABLE_BRANCH.
8052 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8053 insns. This serves two purposes, first it prevents jump.c from
8054 noticing that the last N entries in the table jump to the instruction
8055 immediately after the table and deleting the jumps. Second, those
8056 insns mark where we should emit .begin_brtab and .end_brtab directives
8057 when using GAS (allows for better link time optimizations). */
8064 remove_useless_addtr_insns (1);
8066 if (pa_cpu < PROCESSOR_8000)
8067 pa_combine_instructions ();
8070 /* This is fairly cheap, so always run it if optimizing. */
8071 if (optimize > 0 && !TARGET_BIG_SWITCH)
8073 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8074 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8076 rtx pattern, tmp, location, label;
8077 unsigned int length, i;
8079 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8080 if (GET_CODE (insn) != JUMP_INSN
8081 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8082 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8085 /* Emit marker for the beginning of the branch table. */
8086 emit_insn_before (gen_begin_brtab (), insn);
8088 pattern = PATTERN (insn);
8089 location = PREV_INSN (insn);
8090 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8092 for (i = 0; i < length; i++)
8094 /* Emit a label before each jump to keep jump.c from
8095 removing this code. */
8096 tmp = gen_label_rtx ();
8097 LABEL_NUSES (tmp) = 1;
8098 emit_label_after (tmp, location);
8099 location = NEXT_INSN (location);
8101 if (GET_CODE (pattern) == ADDR_VEC)
8102 label = XEXP (XVECEXP (pattern, 0, i), 0);
8104 label = XEXP (XVECEXP (pattern, 1, i), 0);
8106 tmp = gen_short_jump (label);
8108 /* Emit the jump itself. */
8109 tmp = emit_jump_insn_after (tmp, location);
8110 JUMP_LABEL (tmp) = label;
8111 LABEL_NUSES (label)++;
8112 location = NEXT_INSN (location);
8114 /* Emit a BARRIER after the jump. */
8115 emit_barrier_after (location);
8116 location = NEXT_INSN (location);
8119 /* Emit marker for the end of the branch table. */
8120 emit_insn_before (gen_end_brtab (), location);
8121 location = NEXT_INSN (location);
8122 emit_barrier_after (location);
8124 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8130 /* Still need brtab marker insns. FIXME: the presence of these
8131 markers disables output of the branch table to readonly memory,
8132 and any alignment directives that might be needed. Possibly,
8133 the begin_brtab insn should be output before the label for the
8134 table. This doesn matter at the moment since the tables are
8135 always output in the text section. */
8136 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8138 /* Find an ADDR_VEC insn. */
8139 if (GET_CODE (insn) != JUMP_INSN
8140 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8141 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8144 /* Now generate markers for the beginning and end of the
8146 emit_insn_before (gen_begin_brtab (), insn);
8147 emit_insn_after (gen_end_brtab (), insn);
8152 /* The PA has a number of odd instructions which can perform multiple
8153 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8154 it may be profitable to combine two instructions into one instruction
8155 with two outputs. It's not profitable PA2.0 machines because the
8156 two outputs would take two slots in the reorder buffers.
8158 This routine finds instructions which can be combined and combines
8159 them. We only support some of the potential combinations, and we
8160 only try common ways to find suitable instructions.
8162 * addb can add two registers or a register and a small integer
8163 and jump to a nearby (+-8k) location. Normally the jump to the
8164 nearby location is conditional on the result of the add, but by
8165 using the "true" condition we can make the jump unconditional.
8166 Thus addb can perform two independent operations in one insn.
8168 * movb is similar to addb in that it can perform a reg->reg
8169 or small immediate->reg copy and jump to a nearby (+-8k location).
8171 * fmpyadd and fmpysub can perform a FP multiply and either an
8172 FP add or FP sub if the operands of the multiply and add/sub are
8173 independent (there are other minor restrictions). Note both
8174 the fmpy and fadd/fsub can in theory move to better spots according
8175 to data dependencies, but for now we require the fmpy stay at a
8178 * Many of the memory operations can perform pre & post updates
8179 of index registers. GCC's pre/post increment/decrement addressing
8180 is far too simple to take advantage of all the possibilities. This
8181 pass may not be suitable since those insns may not be independent.
8183 * comclr can compare two ints or an int and a register, nullify
8184 the following instruction and zero some other register. This
8185 is more difficult to use as it's harder to find an insn which
8186 will generate a comclr than finding something like an unconditional
8187 branch. (conditional moves & long branches create comclr insns).
8189 * Most arithmetic operations can conditionally skip the next
8190 instruction. They can be viewed as "perform this operation
8191 and conditionally jump to this nearby location" (where nearby
8192 is an insns away). These are difficult to use due to the
8193 branch length restrictions. */
8196 pa_combine_instructions (void)
8200 /* This can get expensive since the basic algorithm is on the
8201 order of O(n^2) (or worse). Only do it for -O2 or higher
8202 levels of optimization. */
8206 /* Walk down the list of insns looking for "anchor" insns which
8207 may be combined with "floating" insns. As the name implies,
8208 "anchor" instructions don't move, while "floating" insns may
8210 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8211 new = make_insn_raw (new);
8213 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8215 enum attr_pa_combine_type anchor_attr;
8216 enum attr_pa_combine_type floater_attr;
8218 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8219 Also ignore any special USE insns. */
8220 if ((GET_CODE (anchor) != INSN
8221 && GET_CODE (anchor) != JUMP_INSN
8222 && GET_CODE (anchor) != CALL_INSN)
8223 || GET_CODE (PATTERN (anchor)) == USE
8224 || GET_CODE (PATTERN (anchor)) == CLOBBER
8225 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8226 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8229 anchor_attr = get_attr_pa_combine_type (anchor);
8230 /* See if anchor is an insn suitable for combination. */
8231 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8232 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8233 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8234 && ! forward_branch_p (anchor)))
8238 for (floater = PREV_INSN (anchor);
8240 floater = PREV_INSN (floater))
8242 if (GET_CODE (floater) == NOTE
8243 || (GET_CODE (floater) == INSN
8244 && (GET_CODE (PATTERN (floater)) == USE
8245 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8248 /* Anything except a regular INSN will stop our search. */
8249 if (GET_CODE (floater) != INSN
8250 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8251 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8257 /* See if FLOATER is suitable for combination with the
8259 floater_attr = get_attr_pa_combine_type (floater);
8260 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8261 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8262 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8263 && floater_attr == PA_COMBINE_TYPE_FMPY))
8265 /* If ANCHOR and FLOATER can be combined, then we're
8266 done with this pass. */
8267 if (pa_can_combine_p (new, anchor, floater, 0,
8268 SET_DEST (PATTERN (floater)),
8269 XEXP (SET_SRC (PATTERN (floater)), 0),
8270 XEXP (SET_SRC (PATTERN (floater)), 1)))
8274 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8275 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8277 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8279 if (pa_can_combine_p (new, anchor, floater, 0,
8280 SET_DEST (PATTERN (floater)),
8281 XEXP (SET_SRC (PATTERN (floater)), 0),
8282 XEXP (SET_SRC (PATTERN (floater)), 1)))
8287 if (pa_can_combine_p (new, anchor, floater, 0,
8288 SET_DEST (PATTERN (floater)),
8289 SET_SRC (PATTERN (floater)),
8290 SET_SRC (PATTERN (floater))))
8296 /* If we didn't find anything on the backwards scan try forwards. */
8298 && (anchor_attr == PA_COMBINE_TYPE_FMPY
8299 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8301 for (floater = anchor; floater; floater = NEXT_INSN (floater))
8303 if (GET_CODE (floater) == NOTE
8304 || (GET_CODE (floater) == INSN
8305 && (GET_CODE (PATTERN (floater)) == USE
8306 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8310 /* Anything except a regular INSN will stop our search. */
8311 if (GET_CODE (floater) != INSN
8312 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8313 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8319 /* See if FLOATER is suitable for combination with the
8321 floater_attr = get_attr_pa_combine_type (floater);
8322 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8323 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8324 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8325 && floater_attr == PA_COMBINE_TYPE_FMPY))
8327 /* If ANCHOR and FLOATER can be combined, then we're
8328 done with this pass. */
8329 if (pa_can_combine_p (new, anchor, floater, 1,
8330 SET_DEST (PATTERN (floater)),
8331 XEXP (SET_SRC (PATTERN (floater)),
8333 XEXP (SET_SRC (PATTERN (floater)),
8340 /* FLOATER will be nonzero if we found a suitable floating
8341 insn for combination with ANCHOR. */
8343 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8344 || anchor_attr == PA_COMBINE_TYPE_FMPY))
8346 /* Emit the new instruction and delete the old anchor. */
8347 emit_insn_before (gen_rtx_PARALLEL
8349 gen_rtvec (2, PATTERN (anchor),
8350 PATTERN (floater))),
8353 PUT_CODE (anchor, NOTE);
8354 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8355 NOTE_SOURCE_FILE (anchor) = 0;
8357 /* Emit a special USE insn for FLOATER, then delete
8358 the floating insn. */
8359 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8360 delete_insn (floater);
8365 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8368 /* Emit the new_jump instruction and delete the old anchor. */
8370 = emit_jump_insn_before (gen_rtx_PARALLEL
8372 gen_rtvec (2, PATTERN (anchor),
8373 PATTERN (floater))),
8376 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8377 PUT_CODE (anchor, NOTE);
8378 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8379 NOTE_SOURCE_FILE (anchor) = 0;
8381 /* Emit a special USE insn for FLOATER, then delete
8382 the floating insn. */
8383 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8384 delete_insn (floater);
8392 pa_can_combine_p (rtx new, rtx anchor, rtx floater, int reversed, rtx dest,
8395 int insn_code_number;
8398 /* Create a PARALLEL with the patterns of ANCHOR and
8399 FLOATER, try to recognize it, then test constraints
8400 for the resulting pattern.
8402 If the pattern doesn't match or the constraints
8403 aren't met keep searching for a suitable floater
8405 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8406 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8407 INSN_CODE (new) = -1;
8408 insn_code_number = recog_memoized (new);
8409 if (insn_code_number < 0
8410 || (extract_insn (new), ! constrain_operands (1)))
8424 /* There's up to three operands to consider. One
8425 output and two inputs.
8427 The output must not be used between FLOATER & ANCHOR
8428 exclusive. The inputs must not be set between
8429 FLOATER and ANCHOR exclusive. */
8431 if (reg_used_between_p (dest, start, end))
8434 if (reg_set_between_p (src1, start, end))
8437 if (reg_set_between_p (src2, start, end))
8440 /* If we get here, then everything is good. */
8444 /* Return nonzero if references for INSN are delayed.
8446 Millicode insns are actually function calls with some special
8447 constraints on arguments and register usage.
8449 Millicode calls always expect their arguments in the integer argument
8450 registers, and always return their result in %r29 (ret1). They
8451 are expected to clobber their arguments, %r1, %r29, and the return
8452 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8454 This function tells reorg that the references to arguments and
8455 millicode calls do not appear to happen until after the millicode call.
8456 This allows reorg to put insns which set the argument registers into the
8457 delay slot of the millicode call -- thus they act more like traditional
8460 Note we can not consider side effects of the insn to be delayed because
8461 the branch and link insn will clobber the return pointer. If we happened
8462 to use the return pointer in the delay slot of the call, then we lose.
8464 get_attr_type will try to recognize the given insn, so make sure to
8465 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8468 insn_refs_are_delayed (rtx insn)
8470 return ((GET_CODE (insn) == INSN
8471 && GET_CODE (PATTERN (insn)) != SEQUENCE
8472 && GET_CODE (PATTERN (insn)) != USE
8473 && GET_CODE (PATTERN (insn)) != CLOBBER
8474 && get_attr_type (insn) == TYPE_MILLI));
8477 /* On the HP-PA the value is found in register(s) 28(-29), unless
8478 the mode is SF or DF. Then the value is returned in fr4 (32).
8480 This must perform the same promotions as PROMOTE_MODE, else
8481 PROMOTE_FUNCTION_RETURN will not work correctly.
8483 Small structures must be returned in a PARALLEL on PA64 in order
8484 to match the HP Compiler ABI. */
8487 function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
8489 enum machine_mode valmode;
8491 /* Aggregates with a size less than or equal to 128 bits are returned
8492 in GR 28(-29). They are left justified. The pad bits are undefined.
8493 Larger aggregates are returned in memory. */
8494 if (TARGET_64BIT && AGGREGATE_TYPE_P (valtype))
8498 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
8500 for (i = 0; i < ub; i++)
8502 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8503 gen_rtx_REG (DImode, 28 + i),
8508 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
8511 if ((INTEGRAL_TYPE_P (valtype)
8512 && TYPE_PRECISION (valtype) < BITS_PER_WORD)
8513 || POINTER_TYPE_P (valtype))
8514 valmode = word_mode;
8516 valmode = TYPE_MODE (valtype);
8518 if (TREE_CODE (valtype) == REAL_TYPE
8519 && TYPE_MODE (valtype) != TFmode
8520 && !TARGET_SOFT_FLOAT)
8521 return gen_rtx_REG (valmode, 32);
8523 return gen_rtx_REG (valmode, 28);
8526 /* Return the location of a parameter that is passed in a register or NULL
8527 if the parameter has any component that is passed in memory.
8529 This is new code and will be pushed to into the net sources after
8532 ??? We might want to restructure this so that it looks more like other
8535 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
8536 int named ATTRIBUTE_UNUSED)
8538 int max_arg_words = (TARGET_64BIT ? 8 : 4);
8545 if (mode == VOIDmode)
8548 arg_size = FUNCTION_ARG_SIZE (mode, type);
8550 /* If this arg would be passed partially or totally on the stack, then
8551 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
8552 handle arguments which are split between regs and stack slots if
8553 the ABI mandates split arguments. */
8556 /* The 32-bit ABI does not split arguments. */
8557 if (cum->words + arg_size > max_arg_words)
8563 alignment = cum->words & 1;
8564 if (cum->words + alignment >= max_arg_words)
8568 /* The 32bit ABIs and the 64bit ABIs are rather different,
8569 particularly in their handling of FP registers. We might
8570 be able to cleverly share code between them, but I'm not
8571 going to bother in the hope that splitting them up results
8572 in code that is more easily understood. */
8576 /* Advance the base registers to their current locations.
8578 Remember, gprs grow towards smaller register numbers while
8579 fprs grow to higher register numbers. Also remember that
8580 although FP regs are 32-bit addressable, we pretend that
8581 the registers are 64-bits wide. */
8582 gpr_reg_base = 26 - cum->words;
8583 fpr_reg_base = 32 + cum->words;
8585 /* Arguments wider than one word and small aggregates need special
8589 || (type && AGGREGATE_TYPE_P (type)))
8591 /* Double-extended precision (80-bit), quad-precision (128-bit)
8592 and aggregates including complex numbers are aligned on
8593 128-bit boundaries. The first eight 64-bit argument slots
8594 are associated one-to-one, with general registers r26
8595 through r19, and also with floating-point registers fr4
8596 through fr11. Arguments larger than one word are always
8597 passed in general registers.
8599 Using a PARALLEL with a word mode register results in left
8600 justified data on a big-endian target. */
8603 int i, offset = 0, ub = arg_size;
8605 /* Align the base register. */
8606 gpr_reg_base -= alignment;
8608 ub = MIN (ub, max_arg_words - cum->words - alignment);
8609 for (i = 0; i < ub; i++)
8611 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8612 gen_rtx_REG (DImode, gpr_reg_base),
8618 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
8623 /* If the argument is larger than a word, then we know precisely
8624 which registers we must use. */
8638 /* Structures 5 to 8 bytes in size are passed in the general
8639 registers in the same manner as other non floating-point
8640 objects. The data is right-justified and zero-extended
8643 This is magic. Normally, using a PARALLEL results in left
8644 justified data on a big-endian target. However, using a
8645 single double-word register provides the required right
8646 justification for 5 to 8 byte structures. This has nothing
8647 to do with the direction of padding specified for the argument.
8648 It has to do with how the data is widened and shifted into
8649 and from the register.
8651 Aside from adding load_multiple and store_multiple patterns,
8652 this is the only way that I have found to obtain right
8653 justification of BLKmode data when it has a size greater
8654 than one word. Splitting the operation into two SImode loads
8655 or returning a DImode REG results in left justified data. */
8656 if (mode == BLKmode)
8658 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
8659 gen_rtx_REG (DImode, gpr_reg_base),
8661 return gen_rtx_PARALLEL (mode, gen_rtvec (1, loc));
8666 /* We have a single word (32 bits). A simple computation
8667 will get us the register #s we need. */
8668 gpr_reg_base = 26 - cum->words;
8669 fpr_reg_base = 32 + 2 * cum->words;
8673 /* Determine if the argument needs to be passed in both general and
8674 floating point registers. */
8675 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
8676 /* If we are doing soft-float with portable runtime, then there
8677 is no need to worry about FP regs. */
8678 && !TARGET_SOFT_FLOAT
8679 /* The parameter must be some kind of float, else we can just
8680 pass it in integer registers. */
8681 && FLOAT_MODE_P (mode)
8682 /* The target function must not have a prototype. */
8683 && cum->nargs_prototype <= 0
8684 /* libcalls do not need to pass items in both FP and general
8686 && type != NULL_TREE
8687 /* All this hair applies to "outgoing" args only. This includes
8688 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
8690 /* Also pass outgoing floating arguments in both registers in indirect
8691 calls with the 32 bit ABI and the HP assembler since there is no
8692 way to the specify argument locations in static functions. */
8697 && FLOAT_MODE_P (mode)))
8703 gen_rtx_EXPR_LIST (VOIDmode,
8704 gen_rtx_REG (mode, fpr_reg_base),
8706 gen_rtx_EXPR_LIST (VOIDmode,
8707 gen_rtx_REG (mode, gpr_reg_base),
8712 /* See if we should pass this parameter in a general register. */
8713 if (TARGET_SOFT_FLOAT
8714 /* Indirect calls in the normal 32bit ABI require all arguments
8715 to be passed in general registers. */
8716 || (!TARGET_PORTABLE_RUNTIME
8720 /* If the parameter is not a floating point parameter, then
8721 it belongs in GPRs. */
8722 || !FLOAT_MODE_P (mode))
8723 retval = gen_rtx_REG (mode, gpr_reg_base);
8725 retval = gen_rtx_REG (mode, fpr_reg_base);
8731 /* If this arg would be passed totally in registers or totally on the stack,
8732 then this routine should return zero. It is currently called only for
8733 the 64-bit target. */
8735 function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8736 tree type, int named ATTRIBUTE_UNUSED)
8738 unsigned int max_arg_words = 8;
8739 unsigned int offset = 0;
8741 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
8744 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
8745 /* Arg fits fully into registers. */
8747 else if (cum->words + offset >= max_arg_words)
8748 /* Arg fully on the stack. */
8752 return max_arg_words - cum->words - offset;
8756 /* Return 1 if this is a comparison operator. This allows the use of
8757 MATCH_OPERATOR to recognize all the branch insns. */
8760 cmpib_comparison_operator (rtx op, enum machine_mode mode)
8762 return ((mode == VOIDmode || GET_MODE (op) == mode)
8763 && (GET_CODE (op) == EQ
8764 || GET_CODE (op) == NE
8765 || GET_CODE (op) == GT
8766 || GET_CODE (op) == GTU
8767 || GET_CODE (op) == GE
8768 || GET_CODE (op) == LT
8769 || GET_CODE (op) == LE
8770 || GET_CODE (op) == LEU));
8773 /* On hpux10, the linker will give an error if we have a reference
8774 in the read-only data section to a symbol defined in a shared
8775 library. Therefore, expressions that might require a reloc can
8776 not be placed in the read-only data section. */
8779 pa_select_section (tree exp, int reloc,
8780 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
8782 if (TREE_CODE (exp) == VAR_DECL
8783 && TREE_READONLY (exp)
8784 && !TREE_THIS_VOLATILE (exp)
8785 && DECL_INITIAL (exp)
8786 && (DECL_INITIAL (exp) == error_mark_node
8787 || TREE_CONSTANT (DECL_INITIAL (exp)))
8789 readonly_data_section ();
8790 else if (TREE_CODE_CLASS (TREE_CODE (exp)) == 'c'
8791 && !(TREE_CODE (exp) == STRING_CST && flag_writable_strings)
8793 readonly_data_section ();
8799 pa_globalize_label (FILE *stream, const char *name)
8801 /* We only handle DATA objects here, functions are globalized in
8802 ASM_DECLARE_FUNCTION_NAME. */
8803 if (! FUNCTION_NAME_P (name))
8805 fputs ("\t.EXPORT ", stream);
8806 assemble_name (stream, name);
8807 fputs (",DATA\n", stream);