1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "integrate.h"
49 #include "target-def.h"
51 static int hppa_use_dfa_pipeline_interface (void);
53 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
54 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hppa_use_dfa_pipeline_interface
57 hppa_use_dfa_pipeline_interface (void)
62 /* Return nonzero if there is a bypass for the output of
63 OUT_INSN and the fp store IN_INSN. */
65 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
67 enum machine_mode store_mode;
68 enum machine_mode other_mode;
71 if (recog_memoized (in_insn) < 0
72 || get_attr_type (in_insn) != TYPE_FPSTORE
73 || recog_memoized (out_insn) < 0)
76 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
78 set = single_set (out_insn);
82 other_mode = GET_MODE (SET_SRC (set));
84 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
88 #ifndef DO_FRAME_NOTES
89 #ifdef INCOMING_RETURN_ADDR_RTX
90 #define DO_FRAME_NOTES 1
92 #define DO_FRAME_NOTES 0
96 static void copy_reg_pointer (rtx, rtx);
97 static int hppa_address_cost (rtx);
98 static bool hppa_rtx_costs (rtx, int, int, int *);
99 static inline rtx force_mode (enum machine_mode, rtx);
100 static void pa_reorg (void);
101 static void pa_combine_instructions (void);
102 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
103 static int forward_branch_p (rtx);
104 static int shadd_constant_p (int);
105 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
106 static int compute_movstr_length (rtx);
107 static int compute_clrstr_length (rtx);
108 static bool pa_assemble_integer (rtx, unsigned int, int);
109 static void remove_useless_addtr_insns (int);
110 static void store_reg (int, HOST_WIDE_INT, int);
111 static void store_reg_modify (int, int, HOST_WIDE_INT);
112 static void load_reg (int, HOST_WIDE_INT, int);
113 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
114 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
115 static void update_total_code_bytes (int);
116 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
117 static int pa_adjust_cost (rtx, rtx, rtx, int);
118 static int pa_adjust_priority (rtx, int);
119 static int pa_issue_rate (void);
120 static void pa_select_section (tree, int, unsigned HOST_WIDE_INT)
122 static void pa_encode_section_info (tree, rtx, int);
123 static const char *pa_strip_name_encoding (const char *);
124 static bool pa_function_ok_for_sibcall (tree, tree);
125 static void pa_globalize_label (FILE *, const char *)
127 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
128 HOST_WIDE_INT, tree);
129 #if !defined(USE_COLLECT2)
130 static void pa_asm_out_constructor (rtx, int);
131 static void pa_asm_out_destructor (rtx, int);
133 static void pa_init_builtins (void);
134 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
135 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
136 static struct deferred_plabel *get_plabel (const char *)
138 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
139 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
140 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
141 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
142 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
143 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
144 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
145 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
146 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
147 static void output_deferred_plabels (void);
148 #ifdef HPUX_LONG_DOUBLE_LIBRARY
149 static void pa_hpux_init_libfuncs (void);
152 /* Save the operands last given to a compare for use when we
153 generate a scc or bcc insn. */
154 rtx hppa_compare_op0, hppa_compare_op1;
155 enum cmp_type hppa_branch_type;
157 /* Which cpu we are scheduling for. */
158 enum processor_type pa_cpu;
160 /* String to hold which cpu we are scheduling for. */
161 const char *pa_cpu_string;
163 /* Which architecture we are generating code for. */
164 enum architecture_type pa_arch;
166 /* String to hold which architecture we are generating code for. */
167 const char *pa_arch_string;
169 /* Counts for the number of callee-saved general and floating point
170 registers which were saved by the current function's prologue. */
171 static int gr_saved, fr_saved;
173 static rtx find_addr_reg (rtx);
175 /* Keep track of the number of bytes we have output in the CODE subspace
176 during this compilation so we'll know when to emit inline long-calls. */
177 unsigned long total_code_bytes;
179 /* The last address of the previous function plus the number of bytes in
180 associated thunks that have been output. This is used to determine if
181 a thunk can use an IA-relative branch to reach its target function. */
182 static int last_address;
184 /* Variables to handle plabels that we discover are necessary at assembly
185 output time. They are output after the current function. */
186 struct deferred_plabel GTY(())
191 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
193 static size_t n_deferred_plabels = 0;
196 /* Initialize the GCC target structure. */
198 #undef TARGET_ASM_ALIGNED_HI_OP
199 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
200 #undef TARGET_ASM_ALIGNED_SI_OP
201 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
202 #undef TARGET_ASM_ALIGNED_DI_OP
203 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
204 #undef TARGET_ASM_UNALIGNED_HI_OP
205 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
206 #undef TARGET_ASM_UNALIGNED_SI_OP
207 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
208 #undef TARGET_ASM_UNALIGNED_DI_OP
209 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
210 #undef TARGET_ASM_INTEGER
211 #define TARGET_ASM_INTEGER pa_assemble_integer
213 #undef TARGET_ASM_FUNCTION_PROLOGUE
214 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
215 #undef TARGET_ASM_FUNCTION_EPILOGUE
216 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
218 #undef TARGET_SCHED_ADJUST_COST
219 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
220 #undef TARGET_SCHED_ADJUST_PRIORITY
221 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
222 #undef TARGET_SCHED_ISSUE_RATE
223 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
225 #undef TARGET_ENCODE_SECTION_INFO
226 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
227 #undef TARGET_STRIP_NAME_ENCODING
228 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
230 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
231 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
233 #undef TARGET_ASM_OUTPUT_MI_THUNK
234 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
235 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
236 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
238 #undef TARGET_ASM_FILE_END
239 #define TARGET_ASM_FILE_END output_deferred_plabels
241 #if !defined(USE_COLLECT2)
242 #undef TARGET_ASM_CONSTRUCTOR
243 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
244 #undef TARGET_ASM_DESTRUCTOR
245 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
248 #undef TARGET_INIT_BUILTINS
249 #define TARGET_INIT_BUILTINS pa_init_builtins
251 #undef TARGET_RTX_COSTS
252 #define TARGET_RTX_COSTS hppa_rtx_costs
253 #undef TARGET_ADDRESS_COST
254 #define TARGET_ADDRESS_COST hppa_address_cost
256 #undef TARGET_MACHINE_DEPENDENT_REORG
257 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
259 #ifdef HPUX_LONG_DOUBLE_LIBRARY
260 #undef TARGET_INIT_LIBFUNCS
261 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
264 struct gcc_target targetm = TARGET_INITIALIZER;
267 override_options (void)
269 if (pa_cpu_string == NULL)
270 pa_cpu_string = TARGET_SCHED_DEFAULT;
272 if (! strcmp (pa_cpu_string, "8000"))
274 pa_cpu_string = "8000";
275 pa_cpu = PROCESSOR_8000;
277 else if (! strcmp (pa_cpu_string, "7100"))
279 pa_cpu_string = "7100";
280 pa_cpu = PROCESSOR_7100;
282 else if (! strcmp (pa_cpu_string, "700"))
284 pa_cpu_string = "700";
285 pa_cpu = PROCESSOR_700;
287 else if (! strcmp (pa_cpu_string, "7100LC"))
289 pa_cpu_string = "7100LC";
290 pa_cpu = PROCESSOR_7100LC;
292 else if (! strcmp (pa_cpu_string, "7200"))
294 pa_cpu_string = "7200";
295 pa_cpu = PROCESSOR_7200;
297 else if (! strcmp (pa_cpu_string, "7300"))
299 pa_cpu_string = "7300";
300 pa_cpu = PROCESSOR_7300;
304 warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, 7300, and 8000\n", pa_cpu_string);
307 /* Set the instruction set architecture. */
308 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
310 pa_arch_string = "1.0";
311 pa_arch = ARCHITECTURE_10;
312 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
314 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
316 pa_arch_string = "1.1";
317 pa_arch = ARCHITECTURE_11;
318 target_flags &= ~MASK_PA_20;
319 target_flags |= MASK_PA_11;
321 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
323 pa_arch_string = "2.0";
324 pa_arch = ARCHITECTURE_20;
325 target_flags |= MASK_PA_11 | MASK_PA_20;
327 else if (pa_arch_string)
329 warning ("unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
332 /* Unconditional branches in the delay slot are not compatible with dwarf2
333 call frame information. There is no benefit in using this optimization
334 on PA8000 and later processors. */
335 if (pa_cpu >= PROCESSOR_8000
336 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
337 || flag_unwind_tables)
338 target_flags &= ~MASK_JUMP_IN_DELAY;
340 if (flag_pic && TARGET_PORTABLE_RUNTIME)
342 warning ("PIC code generation is not supported in the portable runtime model\n");
345 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
347 warning ("PIC code generation is not compatible with fast indirect calls\n");
350 if (! TARGET_GAS && write_symbols != NO_DEBUG)
352 warning ("-g is only supported when using GAS on this processor,");
353 warning ("-g option disabled");
354 write_symbols = NO_DEBUG;
357 /* We only support the "big PIC" model now. And we always generate PIC
358 code when in 64bit mode. */
359 if (flag_pic == 1 || TARGET_64BIT)
362 /* We can't guarantee that .dword is available for 32-bit targets. */
363 if (UNITS_PER_WORD == 4)
364 targetm.asm_out.aligned_op.di = NULL;
366 /* The unaligned ops are only available when using GAS. */
369 targetm.asm_out.unaligned_op.hi = NULL;
370 targetm.asm_out.unaligned_op.si = NULL;
371 targetm.asm_out.unaligned_op.di = NULL;
376 pa_init_builtins (void)
378 #ifdef DONT_HAVE_FPUTC_UNLOCKED
379 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
380 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
384 /* If FROM is a probable pointer register, mark TO as a probable
385 pointer register with the same pointer alignment as FROM. */
388 copy_reg_pointer (rtx to, rtx from)
390 if (REG_POINTER (from))
391 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
394 /* Return nonzero only if OP is a register of mode MODE,
397 reg_or_0_operand (rtx op, enum machine_mode mode)
399 return (op == CONST0_RTX (mode) || register_operand (op, mode));
402 /* Return nonzero if OP is suitable for use in a call to a named
405 For 2.5 try to eliminate either call_operand_address or
406 function_label_operand, they perform very similar functions. */
408 call_operand_address (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
410 return (GET_MODE (op) == word_mode
411 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
414 /* Return 1 if X contains a symbolic expression. We know these
415 expressions will have one of a few well defined forms, so
416 we need only check those forms. */
418 symbolic_expression_p (rtx x)
421 /* Strip off any HIGH. */
422 if (GET_CODE (x) == HIGH)
425 return (symbolic_operand (x, VOIDmode));
429 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
431 switch (GET_CODE (op))
438 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
439 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
440 && GET_CODE (XEXP (op, 1)) == CONST_INT);
446 /* Return truth value of statement that OP is a symbolic memory
447 operand of mode MODE. */
450 symbolic_memory_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
452 if (GET_CODE (op) == SUBREG)
453 op = SUBREG_REG (op);
454 if (GET_CODE (op) != MEM)
457 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
458 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
461 /* Return 1 if the operand is either a register, zero, or a memory operand
462 that is not symbolic. */
465 reg_or_0_or_nonsymb_mem_operand (rtx op, enum machine_mode mode)
467 if (register_operand (op, mode))
470 if (op == CONST0_RTX (mode))
473 if (GET_CODE (op) == SUBREG)
474 op = SUBREG_REG (op);
476 if (GET_CODE (op) != MEM)
479 /* Until problems with management of the REG_POINTER flag are resolved,
480 we need to delay creating move insns with unscaled indexed addresses
481 until CSE is not expected. */
482 if (!TARGET_NO_SPACE_REGS
484 && GET_CODE (XEXP (op, 0)) == PLUS
485 && REG_P (XEXP (XEXP (op, 0), 0))
486 && REG_P (XEXP (XEXP (op, 0), 1)))
489 return (!symbolic_memory_operand (op, mode)
490 && memory_address_p (mode, XEXP (op, 0)));
493 /* Return 1 if the operand is a register operand or a non-symbolic memory
494 operand after reload. This predicate is used for branch patterns that
495 internally handle register reloading. We need to accept non-symbolic
496 memory operands after reload to ensure that the pattern is still valid
497 if reload didn't find a hard register for the operand. */
500 reg_before_reload_operand (rtx op, enum machine_mode mode)
502 /* Don't accept a SUBREG since it will need a reload. */
503 if (GET_CODE (op) == SUBREG)
506 if (register_operand (op, mode))
510 && memory_operand (op, mode)
511 && !symbolic_memory_operand (op, mode))
517 /* Accept any constant that can be moved in one instruction into a
520 cint_ok_for_move (HOST_WIDE_INT intval)
522 /* OK if ldo, ldil, or zdepi, can be used. */
523 return (CONST_OK_FOR_LETTER_P (intval, 'J')
524 || CONST_OK_FOR_LETTER_P (intval, 'N')
525 || CONST_OK_FOR_LETTER_P (intval, 'K'));
528 /* Return 1 iff OP is an indexed memory operand. */
530 indexed_memory_operand (rtx op, enum machine_mode mode)
532 if (GET_MODE (op) != mode)
535 /* Before reload, a (SUBREG (MEM...)) forces reloading into a register. */
536 if (reload_completed && GET_CODE (op) == SUBREG)
537 op = SUBREG_REG (op);
539 if (GET_CODE (op) != MEM || symbolic_memory_operand (op, mode))
544 return (memory_address_p (mode, op) && IS_INDEX_ADDR_P (op));
547 /* Accept anything that can be used as a destination operand for a
548 move instruction. We don't accept indexed memory operands since
549 they are supported only for floating point stores. */
551 move_dest_operand (rtx op, enum machine_mode mode)
553 if (register_operand (op, mode))
556 if (GET_MODE (op) != mode)
559 if (GET_CODE (op) == SUBREG)
560 op = SUBREG_REG (op);
562 if (GET_CODE (op) != MEM || symbolic_memory_operand (op, mode))
567 return (memory_address_p (mode, op)
568 && !IS_INDEX_ADDR_P (op)
569 && !IS_LO_SUM_DLT_ADDR_P (op));
572 /* Accept anything that can be used as a source operand for a move
575 move_src_operand (rtx op, enum machine_mode mode)
577 if (register_operand (op, mode))
580 if (GET_CODE (op) == CONSTANT_P_RTX)
583 if (GET_CODE (op) == CONST_INT)
584 return cint_ok_for_move (INTVAL (op));
586 if (GET_MODE (op) != mode)
589 if (GET_CODE (op) == SUBREG)
590 op = SUBREG_REG (op);
592 if (GET_CODE (op) != MEM)
595 /* Until problems with management of the REG_POINTER flag are resolved,
596 we need to delay creating move insns with unscaled indexed addresses
597 until CSE is not expected. */
598 if (!TARGET_NO_SPACE_REGS
600 && GET_CODE (XEXP (op, 0)) == PLUS
601 && REG_P (XEXP (XEXP (op, 0), 0))
602 && REG_P (XEXP (XEXP (op, 0), 1)))
605 return memory_address_p (mode, XEXP (op, 0));
608 /* Accept REG and any CONST_INT that can be moved in one instruction into a
611 reg_or_cint_move_operand (rtx op, enum machine_mode mode)
613 if (register_operand (op, mode))
616 return (GET_CODE (op) == CONST_INT && cint_ok_for_move (INTVAL (op)));
620 pic_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
625 switch (GET_CODE (op))
631 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
632 && GET_CODE (XEXP (op, 1)) == CONST_INT);
639 fp_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
641 return reg_renumber && FP_REG_P (op);
646 /* Return truth value of whether OP can be used as an operand in a
647 three operand arithmetic insn that accepts registers of mode MODE
648 or 14-bit signed integers. */
650 arith_operand (rtx op, enum machine_mode mode)
652 return (register_operand (op, mode)
653 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
656 /* Return truth value of whether OP can be used as an operand in a
657 three operand arithmetic insn that accepts registers of mode MODE
658 or 11-bit signed integers. */
660 arith11_operand (rtx op, enum machine_mode mode)
662 return (register_operand (op, mode)
663 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
666 /* Return truth value of whether OP can be used as an operand in a
669 adddi3_operand (rtx op, enum machine_mode mode)
671 return (register_operand (op, mode)
672 || (GET_CODE (op) == CONST_INT
673 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
676 /* A constant integer suitable for use in a PRE_MODIFY memory
679 pre_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
681 return (GET_CODE (op) == CONST_INT
682 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
685 /* A constant integer suitable for use in a POST_MODIFY memory
688 post_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
690 return (GET_CODE (op) == CONST_INT
691 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
695 arith_double_operand (rtx op, enum machine_mode mode)
697 return (register_operand (op, mode)
698 || (GET_CODE (op) == CONST_DOUBLE
699 && GET_MODE (op) == mode
700 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
701 && ((CONST_DOUBLE_HIGH (op) >= 0)
702 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
705 /* Return truth value of whether OP is an integer which fits the
706 range constraining immediate operands in three-address insns, or
707 is an integer register. */
710 ireg_or_int5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
712 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
713 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
716 /* Return nonzero if OP is an integer register, else return zero. */
718 ireg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
720 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
723 /* Return truth value of whether OP is an integer which fits the
724 range constraining immediate operands in three-address insns. */
727 int5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
729 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
733 uint5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
735 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
739 int11_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
741 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
745 uint32_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
747 #if HOST_BITS_PER_WIDE_INT > 32
748 /* All allowed constants will fit a CONST_INT. */
749 return (GET_CODE (op) == CONST_INT
750 && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
752 return (GET_CODE (op) == CONST_INT
753 || (GET_CODE (op) == CONST_DOUBLE
754 && CONST_DOUBLE_HIGH (op) == 0));
759 arith5_operand (rtx op, enum machine_mode mode)
761 return register_operand (op, mode) || int5_operand (op, mode);
764 /* True iff zdepi can be used to generate this CONST_INT.
765 zdepi first sign extends a 5 bit signed number to a given field
766 length, then places this field anywhere in a zero. */
768 zdepi_cint_p (unsigned HOST_WIDE_INT x)
770 unsigned HOST_WIDE_INT lsb_mask, t;
772 /* This might not be obvious, but it's at least fast.
773 This function is critical; we don't have the time loops would take. */
775 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
776 /* Return true iff t is a power of two. */
777 return ((t & (t - 1)) == 0);
780 /* True iff depi or extru can be used to compute (reg & mask).
781 Accept bit pattern like these:
786 and_mask_p (unsigned HOST_WIDE_INT mask)
789 mask += mask & -mask;
790 return (mask & (mask - 1)) == 0;
793 /* True iff depi or extru can be used to compute (reg & OP). */
795 and_operand (rtx op, enum machine_mode mode)
797 return (register_operand (op, mode)
798 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
801 /* True iff depi can be used to compute (reg | MASK). */
803 ior_mask_p (unsigned HOST_WIDE_INT mask)
805 mask += mask & -mask;
806 return (mask & (mask - 1)) == 0;
809 /* True iff depi can be used to compute (reg | OP). */
811 ior_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
813 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
817 lhs_lshift_operand (rtx op, enum machine_mode mode)
819 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
822 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
823 Such values can be the left hand side x in (x << r), using the zvdepi
826 lhs_lshift_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
828 unsigned HOST_WIDE_INT x;
829 if (GET_CODE (op) != CONST_INT)
831 x = INTVAL (op) >> 4;
832 return (x & (x + 1)) == 0;
836 arith32_operand (rtx op, enum machine_mode mode)
838 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
842 pc_or_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
844 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
847 /* Legitimize PIC addresses. If the address is already
848 position-independent, we return ORIG. Newly generated
849 position-independent addresses go to REG. If we need more
850 than one register, we lose. */
853 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
857 /* Labels need special handling. */
858 if (pic_label_operand (orig, mode))
860 /* We do not want to go through the movXX expanders here since that
861 would create recursion.
863 Nor do we really want to call a generator for a named pattern
864 since that requires multiple patterns if we want to support
867 So instead we just emit the raw set, which avoids the movXX
868 expanders completely. */
869 mark_reg_pointer (reg, BITS_PER_UNIT);
870 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
871 current_function_uses_pic_offset_table = 1;
874 if (GET_CODE (orig) == SYMBOL_REF)
881 /* Before reload, allocate a temporary register for the intermediate
882 result. This allows the sequence to be deleted when the final
883 result is unused and the insns are trivially dead. */
884 tmp_reg = ((reload_in_progress || reload_completed)
885 ? reg : gen_reg_rtx (Pmode));
887 emit_move_insn (tmp_reg,
888 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
889 gen_rtx_HIGH (word_mode, orig)));
891 = gen_rtx_MEM (Pmode,
892 gen_rtx_LO_SUM (Pmode, tmp_reg,
893 gen_rtx_UNSPEC (Pmode,
897 current_function_uses_pic_offset_table = 1;
898 MEM_NOTRAP_P (pic_ref) = 1;
899 RTX_UNCHANGING_P (pic_ref) = 1;
900 mark_reg_pointer (reg, BITS_PER_UNIT);
901 insn = emit_move_insn (reg, pic_ref);
903 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
904 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig, REG_NOTES (insn));
908 else if (GET_CODE (orig) == CONST)
912 if (GET_CODE (XEXP (orig, 0)) == PLUS
913 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
919 if (GET_CODE (XEXP (orig, 0)) == PLUS)
921 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
922 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
923 base == reg ? 0 : reg);
928 if (GET_CODE (orig) == CONST_INT)
930 if (INT_14_BITS (orig))
931 return plus_constant (base, INTVAL (orig));
932 orig = force_reg (Pmode, orig);
934 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
935 /* Likewise, should we set special REG_NOTEs here? */
941 /* Try machine-dependent ways of modifying an illegitimate address
942 to be legitimate. If we find one, return the new, valid address.
943 This macro is used in only one place: `memory_address' in explow.c.
945 OLDX is the address as it was before break_out_memory_refs was called.
946 In some cases it is useful to look at this to decide what needs to be done.
948 MODE and WIN are passed so that this macro can use
949 GO_IF_LEGITIMATE_ADDRESS.
951 It is always safe for this macro to do nothing. It exists to recognize
952 opportunities to optimize the output.
954 For the PA, transform:
956 memory(X + <large int>)
960 if (<large int> & mask) >= 16
961 Y = (<large int> & ~mask) + mask + 1 Round up.
963 Y = (<large int> & ~mask) Round down.
965 memory (Z + (<large int> - Y));
967 This is for CSE to find several similar references, and only use one Z.
969 X can either be a SYMBOL_REF or REG, but because combine can not
970 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
971 D will not fit in 14 bits.
973 MODE_FLOAT references allow displacements which fit in 5 bits, so use
976 MODE_INT references allow displacements which fit in 14 bits, so use
979 This relies on the fact that most mode MODE_FLOAT references will use FP
980 registers and most mode MODE_INT references will use integer registers.
981 (In the rare case of an FP register used in an integer MODE, we depend
982 on secondary reloads to clean things up.)
985 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
986 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
987 addressing modes to be used).
989 Put X and Z into registers. Then put the entire expression into
993 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
994 enum machine_mode mode)
998 /* We need to canonicalize the order of operands in unscaled indexed
999 addresses since the code that checks if an address is valid doesn't
1000 always try both orders. */
1001 if (!TARGET_NO_SPACE_REGS
1002 && GET_CODE (x) == PLUS
1003 && GET_MODE (x) == Pmode
1004 && REG_P (XEXP (x, 0))
1005 && REG_P (XEXP (x, 1))
1006 && REG_POINTER (XEXP (x, 0))
1007 && !REG_POINTER (XEXP (x, 1)))
1008 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1011 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1013 /* Strip off CONST. */
1014 if (GET_CODE (x) == CONST)
1017 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1018 That should always be safe. */
1019 if (GET_CODE (x) == PLUS
1020 && GET_CODE (XEXP (x, 0)) == REG
1021 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1023 rtx reg = force_reg (Pmode, XEXP (x, 1));
1024 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1027 /* Note we must reject symbols which represent function addresses
1028 since the assembler/linker can't handle arithmetic on plabels. */
1029 if (GET_CODE (x) == PLUS
1030 && GET_CODE (XEXP (x, 1)) == CONST_INT
1031 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1032 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1033 || GET_CODE (XEXP (x, 0)) == REG))
1035 rtx int_part, ptr_reg;
1037 int offset = INTVAL (XEXP (x, 1));
1040 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1041 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
1043 /* Choose which way to round the offset. Round up if we
1044 are >= halfway to the next boundary. */
1045 if ((offset & mask) >= ((mask + 1) / 2))
1046 newoffset = (offset & ~ mask) + mask + 1;
1048 newoffset = (offset & ~ mask);
1050 /* If the newoffset will not fit in 14 bits (ldo), then
1051 handling this would take 4 or 5 instructions (2 to load
1052 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1053 add the new offset and the SYMBOL_REF.) Combine can
1054 not handle 4->2 or 5->2 combinations, so do not create
1056 if (! VAL_14_BITS_P (newoffset)
1057 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1059 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1062 gen_rtx_HIGH (Pmode, const_part));
1065 gen_rtx_LO_SUM (Pmode,
1066 tmp_reg, const_part));
1070 if (! VAL_14_BITS_P (newoffset))
1071 int_part = force_reg (Pmode, GEN_INT (newoffset));
1073 int_part = GEN_INT (newoffset);
1075 ptr_reg = force_reg (Pmode,
1076 gen_rtx_PLUS (Pmode,
1077 force_reg (Pmode, XEXP (x, 0)),
1080 return plus_constant (ptr_reg, offset - newoffset);
1083 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1085 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1086 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1087 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1088 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
1089 || GET_CODE (XEXP (x, 1)) == SUBREG)
1090 && GET_CODE (XEXP (x, 1)) != CONST)
1092 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1096 if (GET_CODE (reg1) != REG)
1097 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1099 reg2 = XEXP (XEXP (x, 0), 0);
1100 if (GET_CODE (reg2) != REG)
1101 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1103 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1104 gen_rtx_MULT (Pmode,
1110 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1112 Only do so for floating point modes since this is more speculative
1113 and we lose if it's an integer store. */
1114 if (GET_CODE (x) == PLUS
1115 && GET_CODE (XEXP (x, 0)) == PLUS
1116 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1117 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1118 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1119 && (mode == SFmode || mode == DFmode))
1122 /* First, try and figure out what to use as a base register. */
1123 rtx reg1, reg2, base, idx, orig_base;
1125 reg1 = XEXP (XEXP (x, 0), 1);
1130 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1131 then emit_move_sequence will turn on REG_POINTER so we'll know
1132 it's a base register below. */
1133 if (GET_CODE (reg1) != REG)
1134 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1136 if (GET_CODE (reg2) != REG)
1137 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1139 /* Figure out what the base and index are. */
1141 if (GET_CODE (reg1) == REG
1142 && REG_POINTER (reg1))
1145 orig_base = XEXP (XEXP (x, 0), 1);
1146 idx = gen_rtx_PLUS (Pmode,
1147 gen_rtx_MULT (Pmode,
1148 XEXP (XEXP (XEXP (x, 0), 0), 0),
1149 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1152 else if (GET_CODE (reg2) == REG
1153 && REG_POINTER (reg2))
1156 orig_base = XEXP (x, 1);
1163 /* If the index adds a large constant, try to scale the
1164 constant so that it can be loaded with only one insn. */
1165 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1166 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1167 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1168 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1170 /* Divide the CONST_INT by the scale factor, then add it to A. */
1171 int val = INTVAL (XEXP (idx, 1));
1173 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1174 reg1 = XEXP (XEXP (idx, 0), 0);
1175 if (GET_CODE (reg1) != REG)
1176 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1178 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1180 /* We can now generate a simple scaled indexed address. */
1183 (Pmode, gen_rtx_PLUS (Pmode,
1184 gen_rtx_MULT (Pmode, reg1,
1185 XEXP (XEXP (idx, 0), 1)),
1189 /* If B + C is still a valid base register, then add them. */
1190 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1191 && INTVAL (XEXP (idx, 1)) <= 4096
1192 && INTVAL (XEXP (idx, 1)) >= -4096)
1194 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1197 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1199 reg2 = XEXP (XEXP (idx, 0), 0);
1200 if (GET_CODE (reg2) != CONST_INT)
1201 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1203 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1204 gen_rtx_MULT (Pmode,
1210 /* Get the index into a register, then add the base + index and
1211 return a register holding the result. */
1213 /* First get A into a register. */
1214 reg1 = XEXP (XEXP (idx, 0), 0);
1215 if (GET_CODE (reg1) != REG)
1216 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1218 /* And get B into a register. */
1219 reg2 = XEXP (idx, 1);
1220 if (GET_CODE (reg2) != REG)
1221 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1223 reg1 = force_reg (Pmode,
1224 gen_rtx_PLUS (Pmode,
1225 gen_rtx_MULT (Pmode, reg1,
1226 XEXP (XEXP (idx, 0), 1)),
1229 /* Add the result to our base register and return. */
1230 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1234 /* Uh-oh. We might have an address for x[n-100000]. This needs
1235 special handling to avoid creating an indexed memory address
1236 with x-100000 as the base.
1238 If the constant part is small enough, then it's still safe because
1239 there is a guard page at the beginning and end of the data segment.
1241 Scaled references are common enough that we want to try and rearrange the
1242 terms so that we can use indexing for these addresses too. Only
1243 do the optimization for floatint point modes. */
1245 if (GET_CODE (x) == PLUS
1246 && symbolic_expression_p (XEXP (x, 1)))
1248 /* Ugly. We modify things here so that the address offset specified
1249 by the index expression is computed first, then added to x to form
1250 the entire address. */
1252 rtx regx1, regx2, regy1, regy2, y;
1254 /* Strip off any CONST. */
1256 if (GET_CODE (y) == CONST)
1259 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1261 /* See if this looks like
1262 (plus (mult (reg) (shadd_const))
1263 (const (plus (symbol_ref) (const_int))))
1265 Where const_int is small. In that case the const
1266 expression is a valid pointer for indexing.
1268 If const_int is big, but can be divided evenly by shadd_const
1269 and added to (reg). This allows more scaled indexed addresses. */
1270 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1271 && GET_CODE (XEXP (x, 0)) == MULT
1272 && GET_CODE (XEXP (y, 1)) == CONST_INT
1273 && INTVAL (XEXP (y, 1)) >= -4096
1274 && INTVAL (XEXP (y, 1)) <= 4095
1275 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1276 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1278 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1282 if (GET_CODE (reg1) != REG)
1283 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1285 reg2 = XEXP (XEXP (x, 0), 0);
1286 if (GET_CODE (reg2) != REG)
1287 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1289 return force_reg (Pmode,
1290 gen_rtx_PLUS (Pmode,
1291 gen_rtx_MULT (Pmode,
1296 else if ((mode == DFmode || mode == SFmode)
1297 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1298 && GET_CODE (XEXP (x, 0)) == MULT
1299 && GET_CODE (XEXP (y, 1)) == CONST_INT
1300 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1301 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1302 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1305 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1306 / INTVAL (XEXP (XEXP (x, 0), 1))));
1307 regx2 = XEXP (XEXP (x, 0), 0);
1308 if (GET_CODE (regx2) != REG)
1309 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1310 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1314 gen_rtx_PLUS (Pmode,
1315 gen_rtx_MULT (Pmode, regx2,
1316 XEXP (XEXP (x, 0), 1)),
1317 force_reg (Pmode, XEXP (y, 0))));
1319 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1320 && INTVAL (XEXP (y, 1)) >= -4096
1321 && INTVAL (XEXP (y, 1)) <= 4095)
1323 /* This is safe because of the guard page at the
1324 beginning and end of the data space. Just
1325 return the original address. */
1330 /* Doesn't look like one we can optimize. */
1331 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1332 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1333 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1334 regx1 = force_reg (Pmode,
1335 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1337 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1345 /* For the HPPA, REG and REG+CONST is cost 0
1346 and addresses involving symbolic constants are cost 2.
1348 PIC addresses are very expensive.
1350 It is no coincidence that this has the same structure
1351 as GO_IF_LEGITIMATE_ADDRESS. */
1354 hppa_address_cost (rtx X)
1356 switch (GET_CODE (X))
1369 /* Compute a (partial) cost for rtx X. Return true if the complete
1370 cost has been computed, and false if subexpressions should be
1371 scanned. In either case, *TOTAL contains the cost result. */
1374 hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
1379 if (INTVAL (x) == 0)
1381 else if (INT_14_BITS (x))
1398 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1399 && outer_code != SET)
1406 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1407 *total = COSTS_N_INSNS (3);
1408 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1409 *total = COSTS_N_INSNS (8);
1411 *total = COSTS_N_INSNS (20);
1415 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1417 *total = COSTS_N_INSNS (14);
1425 *total = COSTS_N_INSNS (60);
1428 case PLUS: /* this includes shNadd insns */
1430 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1431 *total = COSTS_N_INSNS (3);
1433 *total = COSTS_N_INSNS (1);
1439 *total = COSTS_N_INSNS (1);
1447 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1448 new rtx with the correct mode. */
1450 force_mode (enum machine_mode mode, rtx orig)
1452 if (mode == GET_MODE (orig))
1455 if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
1458 return gen_rtx_REG (mode, REGNO (orig));
1461 /* Emit insns to move operands[1] into operands[0].
1463 Return 1 if we have written out everything that needs to be done to
1464 do the move. Otherwise, return 0 and the caller will emit the move
1467 Note SCRATCH_REG may not be in the proper mode depending on how it
1468 will be used. This routine is responsible for creating a new copy
1469 of SCRATCH_REG in the proper mode. */
1472 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1474 register rtx operand0 = operands[0];
1475 register rtx operand1 = operands[1];
1478 /* We can only handle indexed addresses in the destination operand
1479 of floating point stores. Thus, we need to break out indexed
1480 addresses from the destination operand. */
1481 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1483 /* This is only safe up to the beginning of life analysis. */
1487 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1488 operand0 = replace_equiv_address (operand0, tem);
1491 /* On targets with non-equivalent space registers, break out unscaled
1492 indexed addresses from the source operand before the final CSE.
1493 We have to do this because the REG_POINTER flag is not correctly
1494 carried through various optimization passes and CSE may substitute
1495 a pseudo without the pointer set for one with the pointer set. As
1496 a result, we loose various opportunites to create insns with
1497 unscaled indexed addresses. */
1498 if (!TARGET_NO_SPACE_REGS
1499 && !cse_not_expected
1500 && GET_CODE (operand1) == MEM
1501 && GET_CODE (XEXP (operand1, 0)) == PLUS
1502 && REG_P (XEXP (XEXP (operand1, 0), 0))
1503 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1505 = replace_equiv_address (operand1,
1506 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1509 && reload_in_progress && GET_CODE (operand0) == REG
1510 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1511 operand0 = reg_equiv_mem[REGNO (operand0)];
1512 else if (scratch_reg
1513 && reload_in_progress && GET_CODE (operand0) == SUBREG
1514 && GET_CODE (SUBREG_REG (operand0)) == REG
1515 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1517 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1518 the code which tracks sets/uses for delete_output_reload. */
1519 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1520 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1521 SUBREG_BYTE (operand0));
1522 operand0 = alter_subreg (&temp);
1526 && reload_in_progress && GET_CODE (operand1) == REG
1527 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1528 operand1 = reg_equiv_mem[REGNO (operand1)];
1529 else if (scratch_reg
1530 && reload_in_progress && GET_CODE (operand1) == SUBREG
1531 && GET_CODE (SUBREG_REG (operand1)) == REG
1532 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1534 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1535 the code which tracks sets/uses for delete_output_reload. */
1536 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1537 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1538 SUBREG_BYTE (operand1));
1539 operand1 = alter_subreg (&temp);
1542 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1543 && ((tem = find_replacement (&XEXP (operand0, 0)))
1544 != XEXP (operand0, 0)))
1545 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1547 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1548 && ((tem = find_replacement (&XEXP (operand1, 0)))
1549 != XEXP (operand1, 0)))
1550 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1552 /* Handle secondary reloads for loads/stores of FP registers from
1553 REG+D addresses where D does not fit in 5 bits, including
1554 (subreg (mem (addr))) cases. */
1556 && fp_reg_operand (operand0, mode)
1557 && ((GET_CODE (operand1) == MEM
1558 && !memory_address_p (DFmode, XEXP (operand1, 0)))
1559 || ((GET_CODE (operand1) == SUBREG
1560 && GET_CODE (XEXP (operand1, 0)) == MEM
1561 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0))))))
1563 if (GET_CODE (operand1) == SUBREG)
1564 operand1 = XEXP (operand1, 0);
1566 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1567 it in WORD_MODE regardless of what mode it was originally given
1569 scratch_reg = force_mode (word_mode, scratch_reg);
1571 /* D might not fit in 14 bits either; for such cases load D into
1573 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1575 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1576 emit_move_insn (scratch_reg,
1577 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1579 XEXP (XEXP (operand1, 0), 0),
1583 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1584 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1585 gen_rtx_MEM (mode, scratch_reg)));
1588 else if (scratch_reg
1589 && fp_reg_operand (operand1, mode)
1590 && ((GET_CODE (operand0) == MEM
1591 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1592 || ((GET_CODE (operand0) == SUBREG)
1593 && GET_CODE (XEXP (operand0, 0)) == MEM
1594 && !memory_address_p (DFmode,
1595 XEXP (XEXP (operand0, 0), 0)))))
1597 if (GET_CODE (operand0) == SUBREG)
1598 operand0 = XEXP (operand0, 0);
1600 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1601 it in WORD_MODE regardless of what mode it was originally given
1603 scratch_reg = force_mode (word_mode, scratch_reg);
1605 /* D might not fit in 14 bits either; for such cases load D into
1607 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1609 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1610 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1613 XEXP (XEXP (operand0, 0),
1618 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1619 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1623 /* Handle secondary reloads for loads of FP registers from constant
1624 expressions by forcing the constant into memory.
1626 Use scratch_reg to hold the address of the memory location.
1628 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1629 NO_REGS when presented with a const_int and a register class
1630 containing only FP registers. Doing so unfortunately creates
1631 more problems than it solves. Fix this for 2.5. */
1632 else if (scratch_reg
1633 && CONSTANT_P (operand1)
1634 && fp_reg_operand (operand0, mode))
1638 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1639 it in WORD_MODE regardless of what mode it was originally given
1641 scratch_reg = force_mode (word_mode, scratch_reg);
1643 /* Force the constant into memory and put the address of the
1644 memory location into scratch_reg. */
1645 xoperands[0] = scratch_reg;
1646 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1647 emit_move_sequence (xoperands, Pmode, 0);
1649 /* Now load the destination register. */
1650 emit_insn (gen_rtx_SET (mode, operand0,
1651 gen_rtx_MEM (mode, scratch_reg)));
1654 /* Handle secondary reloads for SAR. These occur when trying to load
1655 the SAR from memory, FP register, or with a constant. */
1656 else if (scratch_reg
1657 && GET_CODE (operand0) == REG
1658 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1659 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1660 && (GET_CODE (operand1) == MEM
1661 || GET_CODE (operand1) == CONST_INT
1662 || (GET_CODE (operand1) == REG
1663 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1665 /* D might not fit in 14 bits either; for such cases load D into
1667 if (GET_CODE (operand1) == MEM
1668 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1670 /* We are reloading the address into the scratch register, so we
1671 want to make sure the scratch register is a full register. */
1672 scratch_reg = force_mode (word_mode, scratch_reg);
1674 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1675 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1678 XEXP (XEXP (operand1, 0),
1682 /* Now we are going to load the scratch register from memory,
1683 we want to load it in the same width as the original MEM,
1684 which must be the same as the width of the ultimate destination,
1686 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1688 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1693 /* We want to load the scratch register using the same mode as
1694 the ultimate destination. */
1695 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1697 emit_move_insn (scratch_reg, operand1);
1700 /* And emit the insn to set the ultimate destination. We know that
1701 the scratch register has the same mode as the destination at this
1703 emit_move_insn (operand0, scratch_reg);
1706 /* Handle the most common case: storing into a register. */
1707 else if (register_operand (operand0, mode))
1709 if (register_operand (operand1, mode)
1710 || (GET_CODE (operand1) == CONST_INT
1711 && cint_ok_for_move (INTVAL (operand1)))
1712 || (operand1 == CONST0_RTX (mode))
1713 || (GET_CODE (operand1) == HIGH
1714 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1715 /* Only `general_operands' can come here, so MEM is ok. */
1716 || GET_CODE (operand1) == MEM)
1718 /* Various sets are created during RTL generation which don't
1719 have the REG_POINTER flag correctly set. After the CSE pass,
1720 instruction recognition can fail if we don't consistently
1721 set this flag when performing register copies. This should
1722 also improve the opportunities for creating insns that use
1723 unscaled indexing. */
1724 if (REG_P (operand0) && REG_P (operand1))
1726 if (REG_POINTER (operand1)
1727 && !REG_POINTER (operand0)
1728 && !HARD_REGISTER_P (operand0))
1729 copy_reg_pointer (operand0, operand1);
1730 else if (REG_POINTER (operand0)
1731 && !REG_POINTER (operand1)
1732 && !HARD_REGISTER_P (operand1))
1733 copy_reg_pointer (operand1, operand0);
1736 /* When MEMs are broken out, the REG_POINTER flag doesn't
1737 get set. In some cases, we can set the REG_POINTER flag
1738 from the declaration for the MEM. */
1739 if (REG_P (operand0)
1740 && GET_CODE (operand1) == MEM
1741 && !REG_POINTER (operand0))
1743 tree decl = MEM_EXPR (operand1);
1745 /* Set the register pointer flag and register alignment
1746 if the declaration for this memory reference is a
1747 pointer type. Fortran indirect argument references
1750 && !(flag_argument_noalias > 1
1751 && TREE_CODE (decl) == INDIRECT_REF
1752 && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1756 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1758 if (TREE_CODE (decl) == COMPONENT_REF)
1759 decl = TREE_OPERAND (decl, 1);
1761 type = TREE_TYPE (decl);
1762 if (TREE_CODE (type) == ARRAY_TYPE)
1763 type = get_inner_array_type (type);
1765 if (POINTER_TYPE_P (type))
1769 type = TREE_TYPE (type);
1770 /* Using TYPE_ALIGN_OK is rather conservative as
1771 only the ada frontend actually sets it. */
1772 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1774 mark_reg_pointer (operand0, align);
1779 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1783 else if (GET_CODE (operand0) == MEM)
1785 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1786 && !(reload_in_progress || reload_completed))
1788 rtx temp = gen_reg_rtx (DFmode);
1790 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1791 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1794 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1796 /* Run this case quickly. */
1797 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1800 if (! (reload_in_progress || reload_completed))
1802 operands[0] = validize_mem (operand0);
1803 operands[1] = operand1 = force_reg (mode, operand1);
1807 /* Simplify the source if we need to.
1808 Note we do have to handle function labels here, even though we do
1809 not consider them legitimate constants. Loop optimizations can
1810 call the emit_move_xxx with one as a source. */
1811 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1812 || function_label_operand (operand1, mode)
1813 || (GET_CODE (operand1) == HIGH
1814 && symbolic_operand (XEXP (operand1, 0), mode)))
1818 if (GET_CODE (operand1) == HIGH)
1821 operand1 = XEXP (operand1, 0);
1823 if (symbolic_operand (operand1, mode))
1825 /* Argh. The assembler and linker can't handle arithmetic
1828 So we force the plabel into memory, load operand0 from
1829 the memory location, then add in the constant part. */
1830 if ((GET_CODE (operand1) == CONST
1831 && GET_CODE (XEXP (operand1, 0)) == PLUS
1832 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1833 || function_label_operand (operand1, mode))
1835 rtx temp, const_part;
1837 /* Figure out what (if any) scratch register to use. */
1838 if (reload_in_progress || reload_completed)
1840 scratch_reg = scratch_reg ? scratch_reg : operand0;
1841 /* SCRATCH_REG will hold an address and maybe the actual
1842 data. We want it in WORD_MODE regardless of what mode it
1843 was originally given to us. */
1844 scratch_reg = force_mode (word_mode, scratch_reg);
1847 scratch_reg = gen_reg_rtx (Pmode);
1849 if (GET_CODE (operand1) == CONST)
1851 /* Save away the constant part of the expression. */
1852 const_part = XEXP (XEXP (operand1, 0), 1);
1853 if (GET_CODE (const_part) != CONST_INT)
1856 /* Force the function label into memory. */
1857 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1861 /* No constant part. */
1862 const_part = NULL_RTX;
1864 /* Force the function label into memory. */
1865 temp = force_const_mem (mode, operand1);
1869 /* Get the address of the memory location. PIC-ify it if
1871 temp = XEXP (temp, 0);
1873 temp = legitimize_pic_address (temp, mode, scratch_reg);
1875 /* Put the address of the memory location into our destination
1878 emit_move_sequence (operands, mode, scratch_reg);
1880 /* Now load from the memory location into our destination
1882 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1883 emit_move_sequence (operands, mode, scratch_reg);
1885 /* And add back in the constant part. */
1886 if (const_part != NULL_RTX)
1887 expand_inc (operand0, const_part);
1896 if (reload_in_progress || reload_completed)
1898 temp = scratch_reg ? scratch_reg : operand0;
1899 /* TEMP will hold an address and maybe the actual
1900 data. We want it in WORD_MODE regardless of what mode it
1901 was originally given to us. */
1902 temp = force_mode (word_mode, temp);
1905 temp = gen_reg_rtx (Pmode);
1907 /* (const (plus (symbol) (const_int))) must be forced to
1908 memory during/after reload if the const_int will not fit
1910 if (GET_CODE (operand1) == CONST
1911 && GET_CODE (XEXP (operand1, 0)) == PLUS
1912 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1913 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1914 && (reload_completed || reload_in_progress)
1917 operands[1] = force_const_mem (mode, operand1);
1918 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1920 emit_move_sequence (operands, mode, temp);
1924 operands[1] = legitimize_pic_address (operand1, mode, temp);
1925 if (REG_P (operand0) && REG_P (operands[1]))
1926 copy_reg_pointer (operand0, operands[1]);
1927 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1930 /* On the HPPA, references to data space are supposed to use dp,
1931 register 27, but showing it in the RTL inhibits various cse
1932 and loop optimizations. */
1937 if (reload_in_progress || reload_completed)
1939 temp = scratch_reg ? scratch_reg : operand0;
1940 /* TEMP will hold an address and maybe the actual
1941 data. We want it in WORD_MODE regardless of what mode it
1942 was originally given to us. */
1943 temp = force_mode (word_mode, temp);
1946 temp = gen_reg_rtx (mode);
1948 /* Loading a SYMBOL_REF into a register makes that register
1949 safe to be used as the base in an indexed address.
1951 Don't mark hard registers though. That loses. */
1952 if (GET_CODE (operand0) == REG
1953 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1954 mark_reg_pointer (operand0, BITS_PER_UNIT);
1955 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1956 mark_reg_pointer (temp, BITS_PER_UNIT);
1959 set = gen_rtx_SET (mode, operand0, temp);
1961 set = gen_rtx_SET (VOIDmode,
1963 gen_rtx_LO_SUM (mode, temp, operand1));
1965 emit_insn (gen_rtx_SET (VOIDmode,
1967 gen_rtx_HIGH (mode, operand1)));
1973 else if (GET_CODE (operand1) != CONST_INT
1974 || !cint_ok_for_move (INTVAL (operand1)))
1978 HOST_WIDE_INT value = INTVAL (operand1);
1979 HOST_WIDE_INT insv = 0;
1983 && GET_CODE (operand1) == CONST_INT
1984 && HOST_BITS_PER_WIDE_INT > 32
1985 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1989 /* Extract the low order 32 bits of the value and sign extend.
1990 If the new value is the same as the original value, we can
1991 can use the original value as-is. If the new value is
1992 different, we use it and insert the most-significant 32-bits
1993 of the original value into the final result. */
1994 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1995 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1998 #if HOST_BITS_PER_WIDE_INT > 32
1999 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2003 operand1 = GEN_INT (nval);
2007 if (reload_in_progress || reload_completed)
2008 temp = scratch_reg ? scratch_reg : operand0;
2010 temp = gen_reg_rtx (mode);
2012 /* We don't directly split DImode constants on 32-bit targets
2013 because PLUS uses an 11-bit immediate and the insn sequence
2014 generated is not as efficient as the one using HIGH/LO_SUM. */
2015 if (GET_CODE (operand1) == CONST_INT
2016 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2019 /* Directly break constant into high and low parts. This
2020 provides better optimization opportunities because various
2021 passes recognize constants split with PLUS but not LO_SUM.
2022 We use a 14-bit signed low part except when the addition
2023 of 0x4000 to the high part might change the sign of the
2025 HOST_WIDE_INT low = value & 0x3fff;
2026 HOST_WIDE_INT high = value & ~ 0x3fff;
2030 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2038 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2039 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2043 emit_insn (gen_rtx_SET (VOIDmode, temp,
2044 gen_rtx_HIGH (mode, operand1)));
2045 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2048 insn = emit_move_insn (operands[0], operands[1]);
2050 /* Now insert the most significant 32 bits of the value
2051 into the register. When we don't have a second register
2052 available, it could take up to nine instructions to load
2053 a 64-bit integer constant. Prior to reload, we force
2054 constants that would take more than three instructions
2055 to load to the constant pool. During and after reload,
2056 we have to handle all possible values. */
2059 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2060 register and the value to be inserted is outside the
2061 range that can be loaded with three depdi instructions. */
2062 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2064 operand1 = GEN_INT (insv);
2066 emit_insn (gen_rtx_SET (VOIDmode, temp,
2067 gen_rtx_HIGH (mode, operand1)));
2068 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2069 emit_insn (gen_insv (operand0, GEN_INT (32),
2074 int len = 5, pos = 27;
2076 /* Insert the bits using the depdi instruction. */
2079 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2080 HOST_WIDE_INT sign = v5 < 0;
2082 /* Left extend the insertion. */
2083 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2084 while (pos > 0 && (insv & 1) == sign)
2086 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2091 emit_insn (gen_insv (operand0, GEN_INT (len),
2092 GEN_INT (pos), GEN_INT (v5)));
2094 len = pos > 0 && pos < 5 ? pos : 5;
2101 = gen_rtx_EXPR_LIST (REG_EQUAL, op1, REG_NOTES (insn));
2106 /* Now have insn-emit do whatever it normally does. */
2110 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2111 it will need a link/runtime reloc). */
2114 reloc_needed (tree exp)
2118 switch (TREE_CODE (exp))
2125 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2126 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2131 case NON_LVALUE_EXPR:
2132 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2138 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
2139 if (TREE_VALUE (link) != 0)
2140 reloc |= reloc_needed (TREE_VALUE (link));
2153 /* Does operand (which is a symbolic_operand) live in text space?
2154 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2158 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2160 if (GET_CODE (operand) == CONST)
2161 operand = XEXP (XEXP (operand, 0), 0);
2164 if (GET_CODE (operand) == SYMBOL_REF)
2165 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2169 if (GET_CODE (operand) == SYMBOL_REF)
2170 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2176 /* Return the best assembler insn template
2177 for moving operands[1] into operands[0] as a fullword. */
2179 singlemove_string (rtx *operands)
2181 HOST_WIDE_INT intval;
2183 if (GET_CODE (operands[0]) == MEM)
2184 return "stw %r1,%0";
2185 if (GET_CODE (operands[1]) == MEM)
2187 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2192 if (GET_MODE (operands[1]) != SFmode)
2195 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2197 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2198 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2200 operands[1] = GEN_INT (i);
2201 /* Fall through to CONST_INT case. */
2203 if (GET_CODE (operands[1]) == CONST_INT)
2205 intval = INTVAL (operands[1]);
2207 if (VAL_14_BITS_P (intval))
2209 else if ((intval & 0x7ff) == 0)
2210 return "ldil L'%1,%0";
2211 else if (zdepi_cint_p (intval))
2212 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2214 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2216 return "copy %1,%0";
2220 /* Compute position (in OP[1]) and width (in OP[2])
2221 useful for copying IMM to a register using the zdepi
2222 instructions. Store the immediate value to insert in OP[0]. */
2224 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2228 /* Find the least significant set bit in IMM. */
2229 for (lsb = 0; lsb < 32; lsb++)
2236 /* Choose variants based on *sign* of the 5-bit field. */
2237 if ((imm & 0x10) == 0)
2238 len = (lsb <= 28) ? 4 : 32 - lsb;
2241 /* Find the width of the bitstring in IMM. */
2242 for (len = 5; len < 32; len++)
2244 if ((imm & (1 << len)) == 0)
2248 /* Sign extend IMM as a 5-bit value. */
2249 imm = (imm & 0xf) - 0x10;
2257 /* Compute position (in OP[1]) and width (in OP[2])
2258 useful for copying IMM to a register using the depdi,z
2259 instructions. Store the immediate value to insert in OP[0]. */
2261 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2263 HOST_WIDE_INT lsb, len;
2265 /* Find the least significant set bit in IMM. */
2266 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2273 /* Choose variants based on *sign* of the 5-bit field. */
2274 if ((imm & 0x10) == 0)
2275 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2276 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2279 /* Find the width of the bitstring in IMM. */
2280 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2282 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2286 /* Sign extend IMM as a 5-bit value. */
2287 imm = (imm & 0xf) - 0x10;
2295 /* Output assembler code to perform a doubleword move insn
2296 with operands OPERANDS. */
2299 output_move_double (rtx *operands)
2301 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2303 rtx addreg0 = 0, addreg1 = 0;
2305 /* First classify both operands. */
2307 if (REG_P (operands[0]))
2309 else if (offsettable_memref_p (operands[0]))
2311 else if (GET_CODE (operands[0]) == MEM)
2316 if (REG_P (operands[1]))
2318 else if (CONSTANT_P (operands[1]))
2320 else if (offsettable_memref_p (operands[1]))
2322 else if (GET_CODE (operands[1]) == MEM)
2327 /* Check for the cases that the operand constraints are not
2328 supposed to allow to happen. Abort if we get one,
2329 because generating code for these cases is painful. */
2331 if (optype0 != REGOP && optype1 != REGOP)
2334 /* Handle auto decrementing and incrementing loads and stores
2335 specifically, since the structure of the function doesn't work
2336 for them without major modification. Do it better when we learn
2337 this port about the general inc/dec addressing of PA.
2338 (This was written by tege. Chide him if it doesn't work.) */
2340 if (optype0 == MEMOP)
2342 /* We have to output the address syntax ourselves, since print_operand
2343 doesn't deal with the addresses we want to use. Fix this later. */
2345 rtx addr = XEXP (operands[0], 0);
2346 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2348 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2350 operands[0] = XEXP (addr, 0);
2351 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2354 if (!reg_overlap_mentioned_p (high_reg, addr))
2356 /* No overlap between high target register and address
2357 register. (We do this in a non-obvious way to
2358 save a register file writeback) */
2359 if (GET_CODE (addr) == POST_INC)
2360 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2361 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2366 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2368 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2370 operands[0] = XEXP (addr, 0);
2371 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2374 if (!reg_overlap_mentioned_p (high_reg, addr))
2376 /* No overlap between high target register and address
2377 register. (We do this in a non-obvious way to
2378 save a register file writeback) */
2379 if (GET_CODE (addr) == PRE_INC)
2380 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2381 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2387 if (optype1 == MEMOP)
2389 /* We have to output the address syntax ourselves, since print_operand
2390 doesn't deal with the addresses we want to use. Fix this later. */
2392 rtx addr = XEXP (operands[1], 0);
2393 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2395 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2397 operands[1] = XEXP (addr, 0);
2398 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2401 if (!reg_overlap_mentioned_p (high_reg, addr))
2403 /* No overlap between high target register and address
2404 register. (We do this in a non-obvious way to
2405 save a register file writeback) */
2406 if (GET_CODE (addr) == POST_INC)
2407 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2408 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2412 /* This is an undefined situation. We should load into the
2413 address register *and* update that register. Probably
2414 we don't need to handle this at all. */
2415 if (GET_CODE (addr) == POST_INC)
2416 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2417 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2420 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2422 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2424 operands[1] = XEXP (addr, 0);
2425 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2428 if (!reg_overlap_mentioned_p (high_reg, addr))
2430 /* No overlap between high target register and address
2431 register. (We do this in a non-obvious way to
2432 save a register file writeback) */
2433 if (GET_CODE (addr) == PRE_INC)
2434 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2435 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2439 /* This is an undefined situation. We should load into the
2440 address register *and* update that register. Probably
2441 we don't need to handle this at all. */
2442 if (GET_CODE (addr) == PRE_INC)
2443 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2444 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2447 else if (GET_CODE (addr) == PLUS
2448 && GET_CODE (XEXP (addr, 0)) == MULT)
2450 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2452 if (!reg_overlap_mentioned_p (high_reg, addr))
2456 xoperands[0] = high_reg;
2457 xoperands[1] = XEXP (addr, 1);
2458 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2459 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2460 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2462 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2468 xoperands[0] = high_reg;
2469 xoperands[1] = XEXP (addr, 1);
2470 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2471 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2472 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2474 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2479 /* If an operand is an unoffsettable memory ref, find a register
2480 we can increment temporarily to make it refer to the second word. */
2482 if (optype0 == MEMOP)
2483 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2485 if (optype1 == MEMOP)
2486 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2488 /* Ok, we can do one word at a time.
2489 Normally we do the low-numbered word first.
2491 In either case, set up in LATEHALF the operands to use
2492 for the high-numbered word and in some cases alter the
2493 operands in OPERANDS to be suitable for the low-numbered word. */
2495 if (optype0 == REGOP)
2496 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2497 else if (optype0 == OFFSOP)
2498 latehalf[0] = adjust_address (operands[0], SImode, 4);
2500 latehalf[0] = operands[0];
2502 if (optype1 == REGOP)
2503 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2504 else if (optype1 == OFFSOP)
2505 latehalf[1] = adjust_address (operands[1], SImode, 4);
2506 else if (optype1 == CNSTOP)
2507 split_double (operands[1], &operands[1], &latehalf[1]);
2509 latehalf[1] = operands[1];
2511 /* If the first move would clobber the source of the second one,
2512 do them in the other order.
2514 This can happen in two cases:
2516 mem -> register where the first half of the destination register
2517 is the same register used in the memory's address. Reload
2518 can create such insns.
2520 mem in this case will be either register indirect or register
2521 indirect plus a valid offset.
2523 register -> register move where REGNO(dst) == REGNO(src + 1)
2524 someone (Tim/Tege?) claimed this can happen for parameter loads.
2526 Handle mem -> register case first. */
2527 if (optype0 == REGOP
2528 && (optype1 == MEMOP || optype1 == OFFSOP)
2529 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2532 /* Do the late half first. */
2534 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2535 output_asm_insn (singlemove_string (latehalf), latehalf);
2539 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2540 return singlemove_string (operands);
2543 /* Now handle register -> register case. */
2544 if (optype0 == REGOP && optype1 == REGOP
2545 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2547 output_asm_insn (singlemove_string (latehalf), latehalf);
2548 return singlemove_string (operands);
2551 /* Normal case: do the two words, low-numbered first. */
2553 output_asm_insn (singlemove_string (operands), operands);
2555 /* Make any unoffsettable addresses point at high-numbered word. */
2557 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2559 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2562 output_asm_insn (singlemove_string (latehalf), latehalf);
2564 /* Undo the adds we just did. */
2566 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2568 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2574 output_fp_move_double (rtx *operands)
2576 if (FP_REG_P (operands[0]))
2578 if (FP_REG_P (operands[1])
2579 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2580 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2582 output_asm_insn ("fldd%F1 %1,%0", operands);
2584 else if (FP_REG_P (operands[1]))
2586 output_asm_insn ("fstd%F0 %1,%0", operands);
2588 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2590 if (GET_CODE (operands[0]) == REG)
2593 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2594 xoperands[0] = operands[0];
2595 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2597 /* This is a pain. You have to be prepared to deal with an
2598 arbitrary address here including pre/post increment/decrement.
2600 so avoid this in the MD. */
2608 /* Return a REG that occurs in ADDR with coefficient 1.
2609 ADDR can be effectively incremented by incrementing REG. */
2612 find_addr_reg (rtx addr)
2614 while (GET_CODE (addr) == PLUS)
2616 if (GET_CODE (XEXP (addr, 0)) == REG)
2617 addr = XEXP (addr, 0);
2618 else if (GET_CODE (XEXP (addr, 1)) == REG)
2619 addr = XEXP (addr, 1);
2620 else if (CONSTANT_P (XEXP (addr, 0)))
2621 addr = XEXP (addr, 1);
2622 else if (CONSTANT_P (XEXP (addr, 1)))
2623 addr = XEXP (addr, 0);
2627 if (GET_CODE (addr) == REG)
2632 /* Emit code to perform a block move.
2634 OPERANDS[0] is the destination pointer as a REG, clobbered.
2635 OPERANDS[1] is the source pointer as a REG, clobbered.
2636 OPERANDS[2] is a register for temporary storage.
2637 OPERANDS[3] is a register for temporary storage.
2638 OPERANDS[4] is the size as a CONST_INT
2639 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2640 OPERANDS[6] is another temporary register. */
2643 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2645 int align = INTVAL (operands[5]);
2646 unsigned long n_bytes = INTVAL (operands[4]);
2648 /* We can't move more than a word at a time because the PA
2649 has no longer integer move insns. (Could use fp mem ops?) */
2650 if (align > (TARGET_64BIT ? 8 : 4))
2651 align = (TARGET_64BIT ? 8 : 4);
2653 /* Note that we know each loop below will execute at least twice
2654 (else we would have open-coded the copy). */
2658 /* Pre-adjust the loop counter. */
2659 operands[4] = GEN_INT (n_bytes - 16);
2660 output_asm_insn ("ldi %4,%2", operands);
2663 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2664 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2665 output_asm_insn ("std,ma %3,8(%0)", operands);
2666 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2667 output_asm_insn ("std,ma %6,8(%0)", operands);
2669 /* Handle the residual. There could be up to 7 bytes of
2670 residual to copy! */
2671 if (n_bytes % 16 != 0)
2673 operands[4] = GEN_INT (n_bytes % 8);
2674 if (n_bytes % 16 >= 8)
2675 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2676 if (n_bytes % 8 != 0)
2677 output_asm_insn ("ldd 0(%1),%6", operands);
2678 if (n_bytes % 16 >= 8)
2679 output_asm_insn ("std,ma %3,8(%0)", operands);
2680 if (n_bytes % 8 != 0)
2681 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2686 /* Pre-adjust the loop counter. */
2687 operands[4] = GEN_INT (n_bytes - 8);
2688 output_asm_insn ("ldi %4,%2", operands);
2691 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2692 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2693 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2694 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2695 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2697 /* Handle the residual. There could be up to 7 bytes of
2698 residual to copy! */
2699 if (n_bytes % 8 != 0)
2701 operands[4] = GEN_INT (n_bytes % 4);
2702 if (n_bytes % 8 >= 4)
2703 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2704 if (n_bytes % 4 != 0)
2705 output_asm_insn ("ldw 0(%1),%6", operands);
2706 if (n_bytes % 8 >= 4)
2707 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2708 if (n_bytes % 4 != 0)
2709 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2714 /* Pre-adjust the loop counter. */
2715 operands[4] = GEN_INT (n_bytes - 4);
2716 output_asm_insn ("ldi %4,%2", operands);
2719 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2720 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2721 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2722 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2723 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2725 /* Handle the residual. */
2726 if (n_bytes % 4 != 0)
2728 if (n_bytes % 4 >= 2)
2729 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2730 if (n_bytes % 2 != 0)
2731 output_asm_insn ("ldb 0(%1),%6", operands);
2732 if (n_bytes % 4 >= 2)
2733 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2734 if (n_bytes % 2 != 0)
2735 output_asm_insn ("stb %6,0(%0)", operands);
2740 /* Pre-adjust the loop counter. */
2741 operands[4] = GEN_INT (n_bytes - 2);
2742 output_asm_insn ("ldi %4,%2", operands);
2745 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2746 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2747 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2748 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2749 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2751 /* Handle the residual. */
2752 if (n_bytes % 2 != 0)
2754 output_asm_insn ("ldb 0(%1),%3", operands);
2755 output_asm_insn ("stb %3,0(%0)", operands);
2764 /* Count the number of insns necessary to handle this block move.
2766 Basic structure is the same as emit_block_move, except that we
2767 count insns rather than emit them. */
2770 compute_movstr_length (rtx insn)
2772 rtx pat = PATTERN (insn);
2773 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2774 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2775 unsigned int n_insns = 0;
2777 /* We can't move more than four bytes at a time because the PA
2778 has no longer integer move insns. (Could use fp mem ops?) */
2779 if (align > (TARGET_64BIT ? 8 : 4))
2780 align = (TARGET_64BIT ? 8 : 4);
2782 /* The basic copying loop. */
2786 if (n_bytes % (2 * align) != 0)
2788 if ((n_bytes % (2 * align)) >= align)
2791 if ((n_bytes % align) != 0)
2795 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2799 /* Emit code to perform a block clear.
2801 OPERANDS[0] is the destination pointer as a REG, clobbered.
2802 OPERANDS[1] is a register for temporary storage.
2803 OPERANDS[2] is the size as a CONST_INT
2804 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2807 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2809 int align = INTVAL (operands[3]);
2810 unsigned long n_bytes = INTVAL (operands[2]);
2812 /* We can't clear more than a word at a time because the PA
2813 has no longer integer move insns. */
2814 if (align > (TARGET_64BIT ? 8 : 4))
2815 align = (TARGET_64BIT ? 8 : 4);
2817 /* Note that we know each loop below will execute at least twice
2818 (else we would have open-coded the copy). */
2822 /* Pre-adjust the loop counter. */
2823 operands[2] = GEN_INT (n_bytes - 16);
2824 output_asm_insn ("ldi %2,%1", operands);
2827 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2828 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2829 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2831 /* Handle the residual. There could be up to 7 bytes of
2832 residual to copy! */
2833 if (n_bytes % 16 != 0)
2835 operands[2] = GEN_INT (n_bytes % 8);
2836 if (n_bytes % 16 >= 8)
2837 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2838 if (n_bytes % 8 != 0)
2839 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2844 /* Pre-adjust the loop counter. */
2845 operands[2] = GEN_INT (n_bytes - 8);
2846 output_asm_insn ("ldi %2,%1", operands);
2849 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2850 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2851 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2853 /* Handle the residual. There could be up to 7 bytes of
2854 residual to copy! */
2855 if (n_bytes % 8 != 0)
2857 operands[2] = GEN_INT (n_bytes % 4);
2858 if (n_bytes % 8 >= 4)
2859 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2860 if (n_bytes % 4 != 0)
2861 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2866 /* Pre-adjust the loop counter. */
2867 operands[2] = GEN_INT (n_bytes - 4);
2868 output_asm_insn ("ldi %2,%1", operands);
2871 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2872 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2873 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2875 /* Handle the residual. */
2876 if (n_bytes % 4 != 0)
2878 if (n_bytes % 4 >= 2)
2879 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2880 if (n_bytes % 2 != 0)
2881 output_asm_insn ("stb %%r0,0(%0)", operands);
2886 /* Pre-adjust the loop counter. */
2887 operands[2] = GEN_INT (n_bytes - 2);
2888 output_asm_insn ("ldi %2,%1", operands);
2891 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2892 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2893 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2895 /* Handle the residual. */
2896 if (n_bytes % 2 != 0)
2897 output_asm_insn ("stb %%r0,0(%0)", operands);
2906 /* Count the number of insns necessary to handle this block move.
2908 Basic structure is the same as emit_block_move, except that we
2909 count insns rather than emit them. */
2912 compute_clrstr_length (rtx insn)
2914 rtx pat = PATTERN (insn);
2915 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2916 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2917 unsigned int n_insns = 0;
2919 /* We can't clear more than a word at a time because the PA
2920 has no longer integer move insns. */
2921 if (align > (TARGET_64BIT ? 8 : 4))
2922 align = (TARGET_64BIT ? 8 : 4);
2924 /* The basic loop. */
2928 if (n_bytes % (2 * align) != 0)
2930 if ((n_bytes % (2 * align)) >= align)
2933 if ((n_bytes % align) != 0)
2937 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2943 output_and (rtx *operands)
2945 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2947 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2948 int ls0, ls1, ms0, p, len;
2950 for (ls0 = 0; ls0 < 32; ls0++)
2951 if ((mask & (1 << ls0)) == 0)
2954 for (ls1 = ls0; ls1 < 32; ls1++)
2955 if ((mask & (1 << ls1)) != 0)
2958 for (ms0 = ls1; ms0 < 32; ms0++)
2959 if ((mask & (1 << ms0)) == 0)
2972 operands[2] = GEN_INT (len);
2973 return "{extru|extrw,u} %1,31,%2,%0";
2977 /* We could use this `depi' for the case above as well, but `depi'
2978 requires one more register file access than an `extru'. */
2983 operands[2] = GEN_INT (p);
2984 operands[3] = GEN_INT (len);
2985 return "{depi|depwi} 0,%2,%3,%0";
2989 return "and %1,%2,%0";
2992 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2993 storing the result in operands[0]. */
2995 output_64bit_and (rtx *operands)
2997 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2999 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3000 int ls0, ls1, ms0, p, len;
3002 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3003 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3006 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3007 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3010 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3011 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3014 if (ms0 != HOST_BITS_PER_WIDE_INT)
3017 if (ls1 == HOST_BITS_PER_WIDE_INT)
3024 operands[2] = GEN_INT (len);
3025 return "extrd,u %1,63,%2,%0";
3029 /* We could use this `depi' for the case above as well, but `depi'
3030 requires one more register file access than an `extru'. */
3035 operands[2] = GEN_INT (p);
3036 operands[3] = GEN_INT (len);
3037 return "depdi 0,%2,%3,%0";
3041 return "and %1,%2,%0";
3045 output_ior (rtx *operands)
3047 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3048 int bs0, bs1, p, len;
3050 if (INTVAL (operands[2]) == 0)
3051 return "copy %1,%0";
3053 for (bs0 = 0; bs0 < 32; bs0++)
3054 if ((mask & (1 << bs0)) != 0)
3057 for (bs1 = bs0; bs1 < 32; bs1++)
3058 if ((mask & (1 << bs1)) == 0)
3061 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
3067 operands[2] = GEN_INT (p);
3068 operands[3] = GEN_INT (len);
3069 return "{depi|depwi} -1,%2,%3,%0";
3072 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3073 storing the result in operands[0]. */
3075 output_64bit_ior (rtx *operands)
3077 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3078 int bs0, bs1, p, len;
3080 if (INTVAL (operands[2]) == 0)
3081 return "copy %1,%0";
3083 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3084 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3087 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3088 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3091 if (bs1 != HOST_BITS_PER_WIDE_INT
3092 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
3098 operands[2] = GEN_INT (p);
3099 operands[3] = GEN_INT (len);
3100 return "depdi -1,%2,%3,%0";
3103 /* Target hook for assembling integer objects. This code handles
3104 aligned SI and DI integers specially, since function references must
3105 be preceded by P%. */
3108 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3110 if (size == UNITS_PER_WORD && aligned_p
3111 && function_label_operand (x, VOIDmode))
3113 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3114 output_addr_const (asm_out_file, x);
3115 fputc ('\n', asm_out_file);
3118 return default_assemble_integer (x, size, aligned_p);
3121 /* Output an ascii string. */
3123 output_ascii (FILE *file, const char *p, int size)
3127 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3129 /* The HP assembler can only take strings of 256 characters at one
3130 time. This is a limitation on input line length, *not* the
3131 length of the string. Sigh. Even worse, it seems that the
3132 restriction is in number of input characters (see \xnn &
3133 \whatever). So we have to do this very carefully. */
3135 fputs ("\t.STRING \"", file);
3138 for (i = 0; i < size; i += 4)
3142 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3144 register unsigned int c = (unsigned char) p[i + io];
3146 if (c == '\"' || c == '\\')
3147 partial_output[co++] = '\\';
3148 if (c >= ' ' && c < 0177)
3149 partial_output[co++] = c;
3153 partial_output[co++] = '\\';
3154 partial_output[co++] = 'x';
3155 hexd = c / 16 - 0 + '0';
3157 hexd -= '9' - 'a' + 1;
3158 partial_output[co++] = hexd;
3159 hexd = c % 16 - 0 + '0';
3161 hexd -= '9' - 'a' + 1;
3162 partial_output[co++] = hexd;
3165 if (chars_output + co > 243)
3167 fputs ("\"\n\t.STRING \"", file);
3170 fwrite (partial_output, 1, (size_t) co, file);
3174 fputs ("\"\n", file);
3177 /* Try to rewrite floating point comparisons & branches to avoid
3178 useless add,tr insns.
3180 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3181 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3182 first attempt to remove useless add,tr insns. It is zero
3183 for the second pass as reorg sometimes leaves bogus REG_DEAD
3186 When CHECK_NOTES is zero we can only eliminate add,tr insns
3187 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3190 remove_useless_addtr_insns (int check_notes)
3193 static int pass = 0;
3195 /* This is fairly cheap, so always run it when optimizing. */
3199 int fbranch_count = 0;
3201 /* Walk all the insns in this function looking for fcmp & fbranch
3202 instructions. Keep track of how many of each we find. */
3203 for (insn = get_insns (); insn; insn = next_insn (insn))
3207 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3208 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3211 tmp = PATTERN (insn);
3213 /* It must be a set. */
3214 if (GET_CODE (tmp) != SET)
3217 /* If the destination is CCFP, then we've found an fcmp insn. */
3218 tmp = SET_DEST (tmp);
3219 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3225 tmp = PATTERN (insn);
3226 /* If this is an fbranch instruction, bump the fbranch counter. */
3227 if (GET_CODE (tmp) == SET
3228 && SET_DEST (tmp) == pc_rtx
3229 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3230 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3231 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3232 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3240 /* Find all floating point compare + branch insns. If possible,
3241 reverse the comparison & the branch to avoid add,tr insns. */
3242 for (insn = get_insns (); insn; insn = next_insn (insn))
3246 /* Ignore anything that isn't an INSN. */
3247 if (GET_CODE (insn) != INSN)
3250 tmp = PATTERN (insn);
3252 /* It must be a set. */
3253 if (GET_CODE (tmp) != SET)
3256 /* The destination must be CCFP, which is register zero. */
3257 tmp = SET_DEST (tmp);
3258 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3261 /* INSN should be a set of CCFP.
3263 See if the result of this insn is used in a reversed FP
3264 conditional branch. If so, reverse our condition and
3265 the branch. Doing so avoids useless add,tr insns. */
3266 next = next_insn (insn);
3269 /* Jumps, calls and labels stop our search. */
3270 if (GET_CODE (next) == JUMP_INSN
3271 || GET_CODE (next) == CALL_INSN
3272 || GET_CODE (next) == CODE_LABEL)
3275 /* As does another fcmp insn. */
3276 if (GET_CODE (next) == INSN
3277 && GET_CODE (PATTERN (next)) == SET
3278 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3279 && REGNO (SET_DEST (PATTERN (next))) == 0)
3282 next = next_insn (next);
3285 /* Is NEXT_INSN a branch? */
3287 && GET_CODE (next) == JUMP_INSN)
3289 rtx pattern = PATTERN (next);
3291 /* If it a reversed fp conditional branch (eg uses add,tr)
3292 and CCFP dies, then reverse our conditional and the branch
3293 to avoid the add,tr. */
3294 if (GET_CODE (pattern) == SET
3295 && SET_DEST (pattern) == pc_rtx
3296 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3297 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3298 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3299 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3300 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3301 && (fcmp_count == fbranch_count
3303 && find_regno_note (next, REG_DEAD, 0))))
3305 /* Reverse the branch. */
3306 tmp = XEXP (SET_SRC (pattern), 1);
3307 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3308 XEXP (SET_SRC (pattern), 2) = tmp;
3309 INSN_CODE (next) = -1;
3311 /* Reverse our condition. */
3312 tmp = PATTERN (insn);
3313 PUT_CODE (XEXP (tmp, 1),
3314 (reverse_condition_maybe_unordered
3315 (GET_CODE (XEXP (tmp, 1)))));
3325 /* You may have trouble believing this, but this is the 32 bit HP-PA
3330 Variable arguments (optional; any number may be allocated)
3332 SP-(4*(N+9)) arg word N
3337 Fixed arguments (must be allocated; may remain unused)
3346 SP-32 External Data Pointer (DP)
3348 SP-24 External/stub RP (RP')
3352 SP-8 Calling Stub RP (RP'')
3357 SP-0 Stack Pointer (points to next available address)
3361 /* This function saves registers as follows. Registers marked with ' are
3362 this function's registers (as opposed to the previous function's).
3363 If a frame_pointer isn't needed, r4 is saved as a general register;
3364 the space for the frame pointer is still allocated, though, to keep
3370 SP (FP') Previous FP
3371 SP + 4 Alignment filler (sigh)
3372 SP + 8 Space for locals reserved here.
3376 SP + n All call saved register used.
3380 SP + o All call saved fp registers used.
3384 SP + p (SP') points to next available address.
3388 /* Global variables set by output_function_prologue(). */
3389 /* Size of frame. Need to know this to emit return insns from
3391 static HOST_WIDE_INT actual_fsize, local_fsize;
3392 static int save_fregs;
3394 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3395 Handle case where DISP > 8k by using the add_high_const patterns.
3397 Note in DISP > 8k case, we will leave the high part of the address
3398 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3401 store_reg (int reg, HOST_WIDE_INT disp, int base)
3403 rtx insn, dest, src, basereg;
3405 src = gen_rtx_REG (word_mode, reg);
3406 basereg = gen_rtx_REG (Pmode, base);
3407 if (VAL_14_BITS_P (disp))
3409 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3410 insn = emit_move_insn (dest, src);
3412 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3414 rtx delta = GEN_INT (disp);
3415 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3417 emit_move_insn (tmpreg, delta);
3418 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3419 dest = gen_rtx_MEM (word_mode, tmpreg);
3420 insn = emit_move_insn (dest, src);
3424 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3425 gen_rtx_SET (VOIDmode,
3426 gen_rtx_MEM (word_mode,
3427 gen_rtx_PLUS (word_mode, basereg,
3435 rtx delta = GEN_INT (disp);
3436 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3437 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3439 emit_move_insn (tmpreg, high);
3440 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3441 insn = emit_move_insn (dest, src);
3445 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3446 gen_rtx_SET (VOIDmode,
3447 gen_rtx_MEM (word_mode,
3448 gen_rtx_PLUS (word_mode, basereg,
3456 RTX_FRAME_RELATED_P (insn) = 1;
3459 /* Emit RTL to store REG at the memory location specified by BASE and then
3460 add MOD to BASE. MOD must be <= 8k. */
3463 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3465 rtx insn, basereg, srcreg, delta;
3467 if (!VAL_14_BITS_P (mod))
3470 basereg = gen_rtx_REG (Pmode, base);
3471 srcreg = gen_rtx_REG (word_mode, reg);
3472 delta = GEN_INT (mod);
3474 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3477 RTX_FRAME_RELATED_P (insn) = 1;
3479 /* RTX_FRAME_RELATED_P must be set on each frame related set
3480 in a parallel with more than one element. Don't set
3481 RTX_FRAME_RELATED_P in the first set if reg is temporary
3482 register 1. The effect of this operation is recorded in
3483 the initial copy. */
3486 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3487 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3491 /* The first element of a PARALLEL is always processed if it is
3492 a SET. Thus, we need an expression list for this case. */
3494 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3495 gen_rtx_SET (VOIDmode, basereg,
3496 gen_rtx_PLUS (word_mode, basereg, delta)),
3502 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3503 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3504 whether to add a frame note or not.
3506 In the DISP > 8k case, we leave the high part of the address in %r1.
3507 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3510 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3514 if (VAL_14_BITS_P (disp))
3516 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3517 plus_constant (gen_rtx_REG (Pmode, base), disp));
3519 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3521 rtx basereg = gen_rtx_REG (Pmode, base);
3522 rtx delta = GEN_INT (disp);
3523 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3525 emit_move_insn (tmpreg, delta);
3526 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3527 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3531 rtx basereg = gen_rtx_REG (Pmode, base);
3532 rtx delta = GEN_INT (disp);
3533 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3535 emit_move_insn (tmpreg,
3536 gen_rtx_PLUS (Pmode, basereg,
3537 gen_rtx_HIGH (Pmode, delta)));
3538 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3539 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3542 if (DO_FRAME_NOTES && note)
3543 RTX_FRAME_RELATED_P (insn) = 1;
3547 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3552 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3553 be consistent with the rounding and size calculation done here.
3554 Change them at the same time. */
3556 /* We do our own stack alignment. First, round the size of the
3557 stack locals up to a word boundary. */
3558 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3560 /* Space for previous frame pointer + filler. If any frame is
3561 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3562 waste some space here for the sake of HP compatibility. The
3563 first slot is only used when the frame pointer is needed. */
3564 if (size || frame_pointer_needed)
3565 size += STARTING_FRAME_OFFSET;
3567 /* If the current function calls __builtin_eh_return, then we need
3568 to allocate stack space for registers that will hold data for
3569 the exception handler. */
3570 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3574 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3576 size += i * UNITS_PER_WORD;
3579 /* Account for space used by the callee general register saves. */
3580 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3581 if (regs_ever_live[i])
3582 size += UNITS_PER_WORD;
3584 /* Account for space used by the callee floating point register saves. */
3585 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3586 if (regs_ever_live[i]
3587 || (!TARGET_64BIT && regs_ever_live[i + 1]))
3591 /* We always save both halves of the FP register, so always
3592 increment the frame size by 8 bytes. */
3596 /* If any of the floating registers are saved, account for the
3597 alignment needed for the floating point register save block. */
3600 size = (size + 7) & ~7;
3605 /* The various ABIs include space for the outgoing parameters in the
3606 size of the current function's stack frame. We don't need to align
3607 for the outgoing arguments as their alignment is set by the final
3608 rounding for the frame as a whole. */
3609 size += current_function_outgoing_args_size;
3611 /* Allocate space for the fixed frame marker. This space must be
3612 allocated for any function that makes calls or allocates
3614 if (!current_function_is_leaf || size)
3615 size += TARGET_64BIT ? 48 : 32;
3617 /* Finally, round to the preferred stack boundary. */
3618 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3619 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3622 /* Generate the assembly code for function entry. FILE is a stdio
3623 stream to output the code to. SIZE is an int: how many units of
3624 temporary storage to allocate.
3626 Refer to the array `regs_ever_live' to determine which registers to
3627 save; `regs_ever_live[I]' is nonzero if register number I is ever
3628 used in the function. This function is responsible for knowing
3629 which registers should not be saved even if used. */
3631 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3632 of memory. If any fpu reg is used in the function, we allocate
3633 such a block here, at the bottom of the frame, just in case it's needed.
3635 If this function is a leaf procedure, then we may choose not
3636 to do a "save" insn. The decision about whether or not
3637 to do this is made in regclass.c. */
3640 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3642 /* The function's label and associated .PROC must never be
3643 separated and must be output *after* any profiling declarations
3644 to avoid changing spaces/subspaces within a procedure. */
3645 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3646 fputs ("\t.PROC\n", file);
3648 /* hppa_expand_prologue does the dirty work now. We just need
3649 to output the assembler directives which denote the start
3651 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3652 if (regs_ever_live[2])
3653 fputs (",CALLS,SAVE_RP", file);
3655 fputs (",NO_CALLS", file);
3657 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3658 at the beginning of the frame and that it is used as the frame
3659 pointer for the frame. We do this because our current frame
3660 layout doesn't conform to that specified in the the HP runtime
3661 documentation and we need a way to indicate to programs such as
3662 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3663 isn't used by HP compilers but is supported by the assembler.
3664 However, SAVE_SP is supposed to indicate that the previous stack
3665 pointer has been saved in the frame marker. */
3666 if (frame_pointer_needed)
3667 fputs (",SAVE_SP", file);
3669 /* Pass on information about the number of callee register saves
3670 performed in the prologue.
3672 The compiler is supposed to pass the highest register number
3673 saved, the assembler then has to adjust that number before
3674 entering it into the unwind descriptor (to account for any
3675 caller saved registers with lower register numbers than the
3676 first callee saved register). */
3678 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3681 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3683 fputs ("\n\t.ENTRY\n", file);
3685 remove_useless_addtr_insns (0);
3689 hppa_expand_prologue (void)
3691 int merge_sp_adjust_with_store = 0;
3692 HOST_WIDE_INT size = get_frame_size ();
3693 HOST_WIDE_INT offset;
3701 /* Compute total size for frame pointer, filler, locals and rounding to
3702 the next word boundary. Similar code appears in compute_frame_size
3703 and must be changed in tandem with this code. */
3704 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3705 if (local_fsize || frame_pointer_needed)
3706 local_fsize += STARTING_FRAME_OFFSET;
3708 actual_fsize = compute_frame_size (size, &save_fregs);
3710 /* Compute a few things we will use often. */
3711 tmpreg = gen_rtx_REG (word_mode, 1);
3713 /* Save RP first. The calling conventions manual states RP will
3714 always be stored into the caller's frame at sp - 20 or sp - 16
3715 depending on which ABI is in use. */
3716 if (regs_ever_live[2] || current_function_calls_eh_return)
3717 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3719 /* Allocate the local frame and set up the frame pointer if needed. */
3720 if (actual_fsize != 0)
3722 if (frame_pointer_needed)
3724 /* Copy the old frame pointer temporarily into %r1. Set up the
3725 new stack pointer, then store away the saved old frame pointer
3726 into the stack at sp and at the same time update the stack
3727 pointer by actual_fsize bytes. Two versions, first
3728 handles small (<8k) frames. The second handles large (>=8k)
3730 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3733 /* We need to record the frame pointer save here since the
3734 new frame pointer is set in the following insn. */
3735 RTX_FRAME_RELATED_P (insn) = 1;
3737 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3738 gen_rtx_SET (VOIDmode,
3739 gen_rtx_MEM (word_mode, stack_pointer_rtx),
3744 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3746 RTX_FRAME_RELATED_P (insn) = 1;
3748 if (VAL_14_BITS_P (actual_fsize))
3749 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3752 /* It is incorrect to store the saved frame pointer at *sp,
3753 then increment sp (writes beyond the current stack boundary).
3755 So instead use stwm to store at *sp and post-increment the
3756 stack pointer as an atomic operation. Then increment sp to
3757 finish allocating the new frame. */
3758 HOST_WIDE_INT adjust1 = 8192 - 64;
3759 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3761 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3762 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3766 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3767 we need to store the previous stack pointer (frame pointer)
3768 into the frame marker on targets that use the HP unwind
3769 library. This allows the HP unwind library to be used to
3770 unwind GCC frames. However, we are not fully compatible
3771 with the HP library because our frame layout differs from
3772 that specified in the HP runtime specification.
3774 We don't want a frame note on this instruction as the frame
3775 marker moves during dynamic stack allocation.
3777 This instruction also serves as a blockage to prevent
3778 register spills from being scheduled before the stack
3779 pointer is raised. This is necessary as we store
3780 registers using the frame pointer as a base register,
3781 and the frame pointer is set before sp is raised. */
3782 if (TARGET_HPUX_UNWIND_LIBRARY)
3784 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3785 GEN_INT (TARGET_64BIT ? -8 : -4));
3787 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3791 emit_insn (gen_blockage ());
3793 /* no frame pointer needed. */
3796 /* In some cases we can perform the first callee register save
3797 and allocating the stack frame at the same time. If so, just
3798 make a note of it and defer allocating the frame until saving
3799 the callee registers. */
3800 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3801 merge_sp_adjust_with_store = 1;
3802 /* Can not optimize. Adjust the stack frame by actual_fsize
3805 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3810 /* Normal register save.
3812 Do not save the frame pointer in the frame_pointer_needed case. It
3813 was done earlier. */
3814 if (frame_pointer_needed)
3816 offset = local_fsize;
3818 /* Saving the EH return data registers in the frame is the simplest
3819 way to get the frame unwind information emitted. We put them
3820 just before the general registers. */
3821 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3823 unsigned int i, regno;
3827 regno = EH_RETURN_DATA_REGNO (i);
3828 if (regno == INVALID_REGNUM)
3831 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3832 offset += UNITS_PER_WORD;
3836 for (i = 18; i >= 4; i--)
3837 if (regs_ever_live[i] && ! call_used_regs[i])
3839 store_reg (i, offset, FRAME_POINTER_REGNUM);
3840 offset += UNITS_PER_WORD;
3843 /* Account for %r3 which is saved in a special place. */
3846 /* No frame pointer needed. */
3849 offset = local_fsize - actual_fsize;
3851 /* Saving the EH return data registers in the frame is the simplest
3852 way to get the frame unwind information emitted. */
3853 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3855 unsigned int i, regno;
3859 regno = EH_RETURN_DATA_REGNO (i);
3860 if (regno == INVALID_REGNUM)
3863 /* If merge_sp_adjust_with_store is nonzero, then we can
3864 optimize the first save. */
3865 if (merge_sp_adjust_with_store)
3867 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3868 merge_sp_adjust_with_store = 0;
3871 store_reg (regno, offset, STACK_POINTER_REGNUM);
3872 offset += UNITS_PER_WORD;
3876 for (i = 18; i >= 3; i--)
3877 if (regs_ever_live[i] && ! call_used_regs[i])
3879 /* If merge_sp_adjust_with_store is nonzero, then we can
3880 optimize the first GR save. */
3881 if (merge_sp_adjust_with_store)
3883 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3884 merge_sp_adjust_with_store = 0;
3887 store_reg (i, offset, STACK_POINTER_REGNUM);
3888 offset += UNITS_PER_WORD;
3892 /* If we wanted to merge the SP adjustment with a GR save, but we never
3893 did any GR saves, then just emit the adjustment here. */
3894 if (merge_sp_adjust_with_store)
3895 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3899 /* The hppa calling conventions say that %r19, the pic offset
3900 register, is saved at sp - 32 (in this function's frame)
3901 when generating PIC code. FIXME: What is the correct thing
3902 to do for functions which make no calls and allocate no
3903 frame? Do we need to allocate a frame, or can we just omit
3904 the save? For now we'll just omit the save.
3906 We don't want a note on this insn as the frame marker can
3907 move if there is a dynamic stack allocation. */
3908 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3910 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3912 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3916 /* Align pointer properly (doubleword boundary). */
3917 offset = (offset + 7) & ~7;
3919 /* Floating point register store. */
3924 /* First get the frame or stack pointer to the start of the FP register
3926 if (frame_pointer_needed)
3928 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3929 base = frame_pointer_rtx;
3933 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3934 base = stack_pointer_rtx;
3937 /* Now actually save the FP registers. */
3938 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3940 if (regs_ever_live[i]
3941 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3943 rtx addr, insn, reg;
3944 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3945 reg = gen_rtx_REG (DFmode, i);
3946 insn = emit_move_insn (addr, reg);
3949 RTX_FRAME_RELATED_P (insn) = 1;
3952 rtx mem = gen_rtx_MEM (DFmode,
3953 plus_constant (base, offset));
3955 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3956 gen_rtx_SET (VOIDmode, mem, reg),
3961 rtx meml = gen_rtx_MEM (SFmode,
3962 plus_constant (base, offset));
3963 rtx memr = gen_rtx_MEM (SFmode,
3964 plus_constant (base, offset + 4));
3965 rtx regl = gen_rtx_REG (SFmode, i);
3966 rtx regr = gen_rtx_REG (SFmode, i + 1);
3967 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3968 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3971 RTX_FRAME_RELATED_P (setl) = 1;
3972 RTX_FRAME_RELATED_P (setr) = 1;
3973 vec = gen_rtvec (2, setl, setr);
3975 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3976 gen_rtx_SEQUENCE (VOIDmode, vec),
3980 offset += GET_MODE_SIZE (DFmode);
3987 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3988 Handle case where DISP > 8k by using the add_high_const patterns. */
3991 load_reg (int reg, HOST_WIDE_INT disp, int base)
3993 rtx dest = gen_rtx_REG (word_mode, reg);
3994 rtx basereg = gen_rtx_REG (Pmode, base);
3997 if (VAL_14_BITS_P (disp))
3998 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3999 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4001 rtx delta = GEN_INT (disp);
4002 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4004 emit_move_insn (tmpreg, delta);
4005 if (TARGET_DISABLE_INDEXING)
4007 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4008 src = gen_rtx_MEM (word_mode, tmpreg);
4011 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4015 rtx delta = GEN_INT (disp);
4016 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4017 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4019 emit_move_insn (tmpreg, high);
4020 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4023 emit_move_insn (dest, src);
4026 /* Update the total code bytes output to the text section. */
4029 update_total_code_bytes (int nbytes)
4031 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4032 && !IN_NAMED_SECTION_P (cfun->decl))
4034 if (INSN_ADDRESSES_SET_P ())
4036 unsigned long old_total = total_code_bytes;
4038 total_code_bytes += nbytes;
4040 /* Be prepared to handle overflows. */
4041 if (old_total > total_code_bytes)
4042 total_code_bytes = -1;
4045 total_code_bytes = -1;
4049 /* This function generates the assembly code for function exit.
4050 Args are as for output_function_prologue ().
4052 The function epilogue should not depend on the current stack
4053 pointer! It should use the frame pointer only. This is mandatory
4054 because of alloca; we also take advantage of it to omit stack
4055 adjustments before returning. */
4058 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4060 rtx insn = get_last_insn ();
4064 /* hppa_expand_epilogue does the dirty work now. We just need
4065 to output the assembler directives which denote the end
4068 To make debuggers happy, emit a nop if the epilogue was completely
4069 eliminated due to a volatile call as the last insn in the
4070 current function. That way the return address (in %r2) will
4071 always point to a valid instruction in the current function. */
4073 /* Get the last real insn. */
4074 if (GET_CODE (insn) == NOTE)
4075 insn = prev_real_insn (insn);
4077 /* If it is a sequence, then look inside. */
4078 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4079 insn = XVECEXP (PATTERN (insn), 0, 0);
4081 /* If insn is a CALL_INSN, then it must be a call to a volatile
4082 function (otherwise there would be epilogue insns). */
4083 if (insn && GET_CODE (insn) == CALL_INSN)
4085 fputs ("\tnop\n", file);
4089 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4091 if (INSN_ADDRESSES_SET_P ())
4093 insn = get_last_nonnote_insn ();
4094 last_address += INSN_ADDRESSES (INSN_UID (insn));
4096 last_address += insn_default_length (insn);
4097 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4098 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4101 /* Finally, update the total number of code bytes output so far. */
4102 update_total_code_bytes (last_address);
4106 hppa_expand_epilogue (void)
4109 HOST_WIDE_INT offset;
4110 HOST_WIDE_INT ret_off = 0;
4112 int merge_sp_adjust_with_load = 0;
4114 /* We will use this often. */
4115 tmpreg = gen_rtx_REG (word_mode, 1);
4117 /* Try to restore RP early to avoid load/use interlocks when
4118 RP gets used in the return (bv) instruction. This appears to still
4119 be necessary even when we schedule the prologue and epilogue. */
4120 if (regs_ever_live [2] || current_function_calls_eh_return)
4122 ret_off = TARGET_64BIT ? -16 : -20;
4123 if (frame_pointer_needed)
4125 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4130 /* No frame pointer, and stack is smaller than 8k. */
4131 if (VAL_14_BITS_P (ret_off - actual_fsize))
4133 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4139 /* General register restores. */
4140 if (frame_pointer_needed)
4142 offset = local_fsize;
4144 /* If the current function calls __builtin_eh_return, then we need
4145 to restore the saved EH data registers. */
4146 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4148 unsigned int i, regno;
4152 regno = EH_RETURN_DATA_REGNO (i);
4153 if (regno == INVALID_REGNUM)
4156 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4157 offset += UNITS_PER_WORD;
4161 for (i = 18; i >= 4; i--)
4162 if (regs_ever_live[i] && ! call_used_regs[i])
4164 load_reg (i, offset, FRAME_POINTER_REGNUM);
4165 offset += UNITS_PER_WORD;
4170 offset = local_fsize - actual_fsize;
4172 /* If the current function calls __builtin_eh_return, then we need
4173 to restore the saved EH data registers. */
4174 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4176 unsigned int i, regno;
4180 regno = EH_RETURN_DATA_REGNO (i);
4181 if (regno == INVALID_REGNUM)
4184 /* Only for the first load.
4185 merge_sp_adjust_with_load holds the register load
4186 with which we will merge the sp adjustment. */
4187 if (merge_sp_adjust_with_load == 0
4189 && VAL_14_BITS_P (-actual_fsize))
4190 merge_sp_adjust_with_load = regno;
4192 load_reg (regno, offset, STACK_POINTER_REGNUM);
4193 offset += UNITS_PER_WORD;
4197 for (i = 18; i >= 3; i--)
4199 if (regs_ever_live[i] && ! call_used_regs[i])
4201 /* Only for the first load.
4202 merge_sp_adjust_with_load holds the register load
4203 with which we will merge the sp adjustment. */
4204 if (merge_sp_adjust_with_load == 0
4206 && VAL_14_BITS_P (-actual_fsize))
4207 merge_sp_adjust_with_load = i;
4209 load_reg (i, offset, STACK_POINTER_REGNUM);
4210 offset += UNITS_PER_WORD;
4215 /* Align pointer properly (doubleword boundary). */
4216 offset = (offset + 7) & ~7;
4218 /* FP register restores. */
4221 /* Adjust the register to index off of. */
4222 if (frame_pointer_needed)
4223 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4225 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4227 /* Actually do the restores now. */
4228 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4229 if (regs_ever_live[i]
4230 || (! TARGET_64BIT && regs_ever_live[i + 1]))
4232 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4233 rtx dest = gen_rtx_REG (DFmode, i);
4234 emit_move_insn (dest, src);
4238 /* Emit a blockage insn here to keep these insns from being moved to
4239 an earlier spot in the epilogue, or into the main instruction stream.
4241 This is necessary as we must not cut the stack back before all the
4242 restores are finished. */
4243 emit_insn (gen_blockage ());
4245 /* Reset stack pointer (and possibly frame pointer). The stack
4246 pointer is initially set to fp + 64 to avoid a race condition. */
4247 if (frame_pointer_needed)
4249 rtx delta = GEN_INT (-64);
4251 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4252 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4254 /* If we were deferring a callee register restore, do it now. */
4255 else if (merge_sp_adjust_with_load)
4257 rtx delta = GEN_INT (-actual_fsize);
4258 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4260 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4262 else if (actual_fsize != 0)
4263 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4266 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4267 frame greater than 8k), do so now. */
4269 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4271 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4273 rtx sa = EH_RETURN_STACKADJ_RTX;
4275 emit_insn (gen_blockage ());
4276 emit_insn (TARGET_64BIT
4277 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4278 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4283 hppa_pic_save_rtx (void)
4285 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4289 hppa_profile_hook (int label_no)
4291 /* We use SImode for the address of the function in both 32 and
4292 64-bit code to avoid having to provide DImode versions of the
4293 lcla2 and load_offset_label_address insn patterns. */
4294 rtx reg = gen_reg_rtx (SImode);
4295 rtx label_rtx = gen_label_rtx ();
4296 rtx begin_label_rtx, call_insn;
4297 char begin_label_name[16];
4299 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4301 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4304 emit_move_insn (arg_pointer_rtx,
4305 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4308 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4310 /* The address of the function is loaded into %r25 with a instruction-
4311 relative sequence that avoids the use of relocations. The sequence
4312 is split so that the load_offset_label_address instruction can
4313 occupy the delay slot of the call to _mcount. */
4315 emit_insn (gen_lcla2 (reg, label_rtx));
4317 emit_insn (gen_lcla1 (reg, label_rtx));
4319 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4320 reg, begin_label_rtx, label_rtx));
4322 #ifndef NO_PROFILE_COUNTERS
4324 rtx count_label_rtx, addr, r24;
4325 char count_label_name[16];
4327 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4328 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4330 addr = force_reg (Pmode, count_label_rtx);
4331 r24 = gen_rtx_REG (Pmode, 24);
4332 emit_move_insn (r24, addr);
4335 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4336 gen_rtx_SYMBOL_REF (Pmode,
4338 GEN_INT (TARGET_64BIT ? 24 : 12)));
4340 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4345 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4346 gen_rtx_SYMBOL_REF (Pmode,
4348 GEN_INT (TARGET_64BIT ? 16 : 8)));
4352 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4353 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4355 /* Indicate the _mcount call cannot throw, nor will it execute a
4357 REG_NOTES (call_insn)
4358 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
4361 /* Fetch the return address for the frame COUNT steps up from
4362 the current frame, after the prologue. FRAMEADDR is the
4363 frame pointer of the COUNT frame.
4365 We want to ignore any export stub remnants here. To handle this,
4366 we examine the code at the return address, and if it is an export
4367 stub, we return a memory rtx for the stub return address stored
4370 The value returned is used in two different ways:
4372 1. To find a function's caller.
4374 2. To change the return address for a function.
4376 This function handles most instances of case 1; however, it will
4377 fail if there are two levels of stubs to execute on the return
4378 path. The only way I believe that can happen is if the return value
4379 needs a parameter relocation, which never happens for C code.
4381 This function handles most instances of case 2; however, it will
4382 fail if we did not originally have stub code on the return path
4383 but will need stub code on the new return path. This can happen if
4384 the caller & callee are both in the main program, but the new
4385 return location is in a shared library. */
4388 return_addr_rtx (int count, rtx frameaddr)
4398 rp = get_hard_reg_initial_val (Pmode, 2);
4400 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4403 saved_rp = gen_reg_rtx (Pmode);
4404 emit_move_insn (saved_rp, rp);
4406 /* Get pointer to the instruction stream. We have to mask out the
4407 privilege level from the two low order bits of the return address
4408 pointer here so that ins will point to the start of the first
4409 instruction that would have been executed if we returned. */
4410 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4411 label = gen_label_rtx ();
4413 /* Check the instruction stream at the normal return address for the
4416 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4417 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4418 0x00011820 | stub+16: mtsp r1,sr0
4419 0xe0400002 | stub+20: be,n 0(sr0,rp)
4421 If it is an export stub, than our return address is really in
4424 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4425 NULL_RTX, SImode, 1);
4426 emit_jump_insn (gen_bne (label));
4428 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4429 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4430 emit_jump_insn (gen_bne (label));
4432 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4433 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4434 emit_jump_insn (gen_bne (label));
4436 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4437 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
4439 /* If there is no export stub then just use the value saved from
4440 the return pointer register. */
4442 emit_jump_insn (gen_bne (label));
4444 /* Here we know that our return address points to an export
4445 stub. We don't want to return the address of the export stub,
4446 but rather the return address of the export stub. That return
4447 address is stored at -24[frameaddr]. */
4449 emit_move_insn (saved_rp,
4451 memory_address (Pmode,
4452 plus_constant (frameaddr,
4459 /* This is only valid once reload has completed because it depends on
4460 knowing exactly how much (if any) frame there is and...
4462 It's only valid if there is no frame marker to de-allocate and...
4464 It's only valid if %r2 hasn't been saved into the caller's frame
4465 (we're not profiling and %r2 isn't live anywhere). */
4467 hppa_can_use_return_insn_p (void)
4469 return (reload_completed
4470 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
4471 && ! regs_ever_live[2]
4472 && ! frame_pointer_needed);
4476 emit_bcond_fp (enum rtx_code code, rtx operand0)
4478 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4479 gen_rtx_IF_THEN_ELSE (VOIDmode,
4480 gen_rtx_fmt_ee (code,
4482 gen_rtx_REG (CCFPmode, 0),
4484 gen_rtx_LABEL_REF (VOIDmode, operand0),
4490 gen_cmp_fp (enum rtx_code code, rtx operand0, rtx operand1)
4492 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4493 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4496 /* Adjust the cost of a scheduling dependency. Return the new cost of
4497 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4500 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4502 enum attr_type attr_type;
4504 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4505 true dependencies as they are described with bypasses now. */
4506 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4509 if (! recog_memoized (insn))
4512 attr_type = get_attr_type (insn);
4514 if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
4516 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4519 if (attr_type == TYPE_FPLOAD)
4521 rtx pat = PATTERN (insn);
4522 rtx dep_pat = PATTERN (dep_insn);
4523 if (GET_CODE (pat) == PARALLEL)
4525 /* This happens for the fldXs,mb patterns. */
4526 pat = XVECEXP (pat, 0, 0);
4528 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4529 /* If this happens, we have to extend this to schedule
4530 optimally. Return 0 for now. */
4533 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4535 if (! recog_memoized (dep_insn))
4537 switch (get_attr_type (dep_insn))
4544 case TYPE_FPSQRTSGL:
4545 case TYPE_FPSQRTDBL:
4546 /* A fpload can't be issued until one cycle before a
4547 preceding arithmetic operation has finished if
4548 the target of the fpload is any of the sources
4549 (or destination) of the arithmetic operation. */
4550 return insn_default_latency (dep_insn) - 1;
4557 else if (attr_type == TYPE_FPALU)
4559 rtx pat = PATTERN (insn);
4560 rtx dep_pat = PATTERN (dep_insn);
4561 if (GET_CODE (pat) == PARALLEL)
4563 /* This happens for the fldXs,mb patterns. */
4564 pat = XVECEXP (pat, 0, 0);
4566 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4567 /* If this happens, we have to extend this to schedule
4568 optimally. Return 0 for now. */
4571 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4573 if (! recog_memoized (dep_insn))
4575 switch (get_attr_type (dep_insn))
4579 case TYPE_FPSQRTSGL:
4580 case TYPE_FPSQRTDBL:
4581 /* An ALU flop can't be issued until two cycles before a
4582 preceding divide or sqrt operation has finished if
4583 the target of the ALU flop is any of the sources
4584 (or destination) of the divide or sqrt operation. */
4585 return insn_default_latency (dep_insn) - 2;
4593 /* For other anti dependencies, the cost is 0. */
4596 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4598 /* Output dependency; DEP_INSN writes a register that INSN writes some
4600 if (attr_type == TYPE_FPLOAD)
4602 rtx pat = PATTERN (insn);
4603 rtx dep_pat = PATTERN (dep_insn);
4604 if (GET_CODE (pat) == PARALLEL)
4606 /* This happens for the fldXs,mb patterns. */
4607 pat = XVECEXP (pat, 0, 0);
4609 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4610 /* If this happens, we have to extend this to schedule
4611 optimally. Return 0 for now. */
4614 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4616 if (! recog_memoized (dep_insn))
4618 switch (get_attr_type (dep_insn))
4625 case TYPE_FPSQRTSGL:
4626 case TYPE_FPSQRTDBL:
4627 /* A fpload can't be issued until one cycle before a
4628 preceding arithmetic operation has finished if
4629 the target of the fpload is the destination of the
4630 arithmetic operation.
4632 Exception: For PA7100LC, PA7200 and PA7300, the cost
4633 is 3 cycles, unless they bundle together. We also
4634 pay the penalty if the second insn is a fpload. */
4635 return insn_default_latency (dep_insn) - 1;
4642 else if (attr_type == TYPE_FPALU)
4644 rtx pat = PATTERN (insn);
4645 rtx dep_pat = PATTERN (dep_insn);
4646 if (GET_CODE (pat) == PARALLEL)
4648 /* This happens for the fldXs,mb patterns. */
4649 pat = XVECEXP (pat, 0, 0);
4651 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4652 /* If this happens, we have to extend this to schedule
4653 optimally. Return 0 for now. */
4656 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4658 if (! recog_memoized (dep_insn))
4660 switch (get_attr_type (dep_insn))
4664 case TYPE_FPSQRTSGL:
4665 case TYPE_FPSQRTDBL:
4666 /* An ALU flop can't be issued until two cycles before a
4667 preceding divide or sqrt operation has finished if
4668 the target of the ALU flop is also the target of
4669 the divide or sqrt operation. */
4670 return insn_default_latency (dep_insn) - 2;
4678 /* For other output dependencies, the cost is 0. */
4685 /* Adjust scheduling priorities. We use this to try and keep addil
4686 and the next use of %r1 close together. */
4688 pa_adjust_priority (rtx insn, int priority)
4690 rtx set = single_set (insn);
4694 src = SET_SRC (set);
4695 dest = SET_DEST (set);
4696 if (GET_CODE (src) == LO_SUM
4697 && symbolic_operand (XEXP (src, 1), VOIDmode)
4698 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4701 else if (GET_CODE (src) == MEM
4702 && GET_CODE (XEXP (src, 0)) == LO_SUM
4703 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4704 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4707 else if (GET_CODE (dest) == MEM
4708 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4709 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4710 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4716 /* The 700 can only issue a single insn at a time.
4717 The 7XXX processors can issue two insns at a time.
4718 The 8000 can issue 4 insns at a time. */
4720 pa_issue_rate (void)
4724 case PROCESSOR_700: return 1;
4725 case PROCESSOR_7100: return 2;
4726 case PROCESSOR_7100LC: return 2;
4727 case PROCESSOR_7200: return 2;
4728 case PROCESSOR_7300: return 2;
4729 case PROCESSOR_8000: return 4;
4738 /* Return any length adjustment needed by INSN which already has its length
4739 computed as LENGTH. Return zero if no adjustment is necessary.
4741 For the PA: function calls, millicode calls, and backwards short
4742 conditional branches with unfilled delay slots need an adjustment by +1
4743 (to account for the NOP which will be inserted into the instruction stream).
4745 Also compute the length of an inline block move here as it is too
4746 complicated to express as a length attribute in pa.md. */
4748 pa_adjust_insn_length (rtx insn, int length)
4750 rtx pat = PATTERN (insn);
4752 /* Jumps inside switch tables which have unfilled delay slots need
4754 if (GET_CODE (insn) == JUMP_INSN
4755 && GET_CODE (pat) == PARALLEL
4756 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4758 /* Millicode insn with an unfilled delay slot. */
4759 else if (GET_CODE (insn) == INSN
4760 && GET_CODE (pat) != SEQUENCE
4761 && GET_CODE (pat) != USE
4762 && GET_CODE (pat) != CLOBBER
4763 && get_attr_type (insn) == TYPE_MILLI)
4765 /* Block move pattern. */
4766 else if (GET_CODE (insn) == INSN
4767 && GET_CODE (pat) == PARALLEL
4768 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4769 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4770 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4771 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4772 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4773 return compute_movstr_length (insn) - 4;
4774 /* Block clear pattern. */
4775 else if (GET_CODE (insn) == INSN
4776 && GET_CODE (pat) == PARALLEL
4777 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4778 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4779 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4780 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4781 return compute_clrstr_length (insn) - 4;
4782 /* Conditional branch with an unfilled delay slot. */
4783 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4785 /* Adjust a short backwards conditional with an unfilled delay slot. */
4786 if (GET_CODE (pat) == SET
4788 && ! forward_branch_p (insn))
4790 else if (GET_CODE (pat) == PARALLEL
4791 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4794 /* Adjust dbra insn with short backwards conditional branch with
4795 unfilled delay slot -- only for case where counter is in a
4796 general register register. */
4797 else if (GET_CODE (pat) == PARALLEL
4798 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4799 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4800 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4802 && ! forward_branch_p (insn))
4810 /* Print operand X (an rtx) in assembler syntax to file FILE.
4811 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4812 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4815 print_operand (FILE *file, rtx x, int code)
4820 /* Output a 'nop' if there's nothing for the delay slot. */
4821 if (dbr_sequence_length () == 0)
4822 fputs ("\n\tnop", file);
4825 /* Output a nullification completer if there's nothing for the */
4826 /* delay slot or nullification is requested. */
4827 if (dbr_sequence_length () == 0 ||
4829 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4833 /* Print out the second register name of a register pair.
4834 I.e., R (6) => 7. */
4835 fputs (reg_names[REGNO (x) + 1], file);
4838 /* A register or zero. */
4840 || (x == CONST0_RTX (DFmode))
4841 || (x == CONST0_RTX (SFmode)))
4843 fputs ("%r0", file);
4849 /* A register or zero (floating point). */
4851 || (x == CONST0_RTX (DFmode))
4852 || (x == CONST0_RTX (SFmode)))
4854 fputs ("%fr0", file);
4863 xoperands[0] = XEXP (XEXP (x, 0), 0);
4864 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4865 output_global_address (file, xoperands[1], 0);
4866 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4870 case 'C': /* Plain (C)ondition */
4872 switch (GET_CODE (x))
4875 fputs ("=", file); break;
4877 fputs ("<>", file); break;
4879 fputs (">", file); break;
4881 fputs (">=", file); break;
4883 fputs (">>=", file); break;
4885 fputs (">>", file); break;
4887 fputs ("<", file); break;
4889 fputs ("<=", file); break;
4891 fputs ("<<=", file); break;
4893 fputs ("<<", file); break;
4898 case 'N': /* Condition, (N)egated */
4899 switch (GET_CODE (x))
4902 fputs ("<>", file); break;
4904 fputs ("=", file); break;
4906 fputs ("<=", file); break;
4908 fputs ("<", file); break;
4910 fputs ("<<", file); break;
4912 fputs ("<<=", file); break;
4914 fputs (">=", file); break;
4916 fputs (">", file); break;
4918 fputs (">>", file); break;
4920 fputs (">>=", file); break;
4925 /* For floating point comparisons. Note that the output
4926 predicates are the complement of the desired mode. */
4928 switch (GET_CODE (x))
4931 fputs ("!=", file); break;
4933 fputs ("=", file); break;
4935 fputs ("!>", file); break;
4937 fputs ("!>=", file); break;
4939 fputs ("!<", file); break;
4941 fputs ("!<=", file); break;
4943 fputs ("!<>", file); break;
4945 fputs (">", file); break;
4947 fputs (">=", file); break;
4949 fputs ("<", file); break;
4951 fputs ("<=", file); break;
4953 fputs ("<>", file); break;
4955 fputs ("<=>", file); break;
4957 fputs ("!<=>", file); break;
4962 case 'S': /* Condition, operands are (S)wapped. */
4963 switch (GET_CODE (x))
4966 fputs ("=", file); break;
4968 fputs ("<>", file); break;
4970 fputs ("<", file); break;
4972 fputs ("<=", file); break;
4974 fputs ("<<=", file); break;
4976 fputs ("<<", file); break;
4978 fputs (">", file); break;
4980 fputs (">=", file); break;
4982 fputs (">>=", file); break;
4984 fputs (">>", file); break;
4989 case 'B': /* Condition, (B)oth swapped and negate. */
4990 switch (GET_CODE (x))
4993 fputs ("<>", file); break;
4995 fputs ("=", file); break;
4997 fputs (">=", file); break;
4999 fputs (">", file); break;
5001 fputs (">>", file); break;
5003 fputs (">>=", file); break;
5005 fputs ("<=", file); break;
5007 fputs ("<", file); break;
5009 fputs ("<<", file); break;
5011 fputs ("<<=", file); break;
5017 if (GET_CODE (x) == CONST_INT)
5019 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5024 if (GET_CODE (x) == CONST_INT)
5026 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5031 if (GET_CODE (x) == CONST_INT)
5033 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5038 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
5040 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5045 if (GET_CODE (x) == CONST_INT)
5047 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5052 if (GET_CODE (x) == CONST_INT)
5054 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5059 if (GET_CODE (x) == CONST_INT)
5064 switch (GET_CODE (XEXP (x, 0)))
5068 if (ASSEMBLER_DIALECT == 0)
5069 fputs ("s,mb", file);
5071 fputs (",mb", file);
5075 if (ASSEMBLER_DIALECT == 0)
5076 fputs ("s,ma", file);
5078 fputs (",ma", file);
5081 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5082 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5084 if (ASSEMBLER_DIALECT == 0)
5087 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5088 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5090 if (ASSEMBLER_DIALECT == 0)
5091 fputs ("x,s", file);
5095 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5099 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5105 output_global_address (file, x, 0);
5108 output_global_address (file, x, 1);
5110 case 0: /* Don't do anything special */
5115 compute_zdepwi_operands (INTVAL (x), op);
5116 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5122 compute_zdepdi_operands (INTVAL (x), op);
5123 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5127 /* We can get here from a .vtable_inherit due to our
5128 CONSTANT_ADDRESS_P rejecting perfectly good constant
5134 if (GET_CODE (x) == REG)
5136 fputs (reg_names [REGNO (x)], file);
5137 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5143 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5144 && (REGNO (x) & 1) == 0)
5147 else if (GET_CODE (x) == MEM)
5149 int size = GET_MODE_SIZE (GET_MODE (x));
5150 rtx base = NULL_RTX;
5151 switch (GET_CODE (XEXP (x, 0)))
5155 base = XEXP (XEXP (x, 0), 0);
5156 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5160 base = XEXP (XEXP (x, 0), 0);
5161 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5164 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5165 fprintf (file, "%s(%s)",
5166 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5167 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5168 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5169 fprintf (file, "%s(%s)",
5170 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5171 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5172 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5173 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5175 /* Because the REG_POINTER flag can get lost during reload,
5176 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5177 index and base registers in the combined move patterns. */
5178 rtx base = XEXP (XEXP (x, 0), 1);
5179 rtx index = XEXP (XEXP (x, 0), 0);
5181 fprintf (file, "%s(%s)",
5182 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5185 output_address (XEXP (x, 0));
5188 output_address (XEXP (x, 0));
5193 output_addr_const (file, x);
5196 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5199 output_global_address (FILE *file, rtx x, int round_constant)
5202 /* Imagine (high (const (plus ...))). */
5203 if (GET_CODE (x) == HIGH)
5206 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5207 assemble_name (file, XSTR (x, 0));
5208 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5210 assemble_name (file, XSTR (x, 0));
5211 fputs ("-$global$", file);
5213 else if (GET_CODE (x) == CONST)
5215 const char *sep = "";
5216 int offset = 0; /* assembler wants -$global$ at end */
5217 rtx base = NULL_RTX;
5219 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5221 base = XEXP (XEXP (x, 0), 0);
5222 output_addr_const (file, base);
5224 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
5225 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5228 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
5230 base = XEXP (XEXP (x, 0), 1);
5231 output_addr_const (file, base);
5233 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
5234 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5237 /* How bogus. The compiler is apparently responsible for
5238 rounding the constant if it uses an LR field selector.
5240 The linker and/or assembler seem a better place since
5241 they have to do this kind of thing already.
5243 If we fail to do this, HP's optimizing linker may eliminate
5244 an addil, but not update the ldw/stw/ldo instruction that
5245 uses the result of the addil. */
5247 offset = ((offset + 0x1000) & ~0x1fff);
5249 if (GET_CODE (XEXP (x, 0)) == PLUS)
5259 else if (GET_CODE (XEXP (x, 0)) == MINUS
5260 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5264 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5265 fputs ("-$global$", file);
5267 fprintf (file, "%s%d", sep, offset);
5270 output_addr_const (file, x);
5273 /* Output boilerplate text to appear at the beginning of the file.
5274 There are several possible versions. */
5275 #define aputs(x) fputs(x, asm_out_file)
5277 pa_file_start_level (void)
5280 aputs ("\t.LEVEL 2.0w\n");
5281 else if (TARGET_PA_20)
5282 aputs ("\t.LEVEL 2.0\n");
5283 else if (TARGET_PA_11)
5284 aputs ("\t.LEVEL 1.1\n");
5286 aputs ("\t.LEVEL 1.0\n");
5290 pa_file_start_space (int sortspace)
5292 aputs ("\t.SPACE $PRIVATE$");
5295 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5296 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5297 "\n\t.SPACE $TEXT$");
5300 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5301 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5305 pa_file_start_file (int want_version)
5307 if (write_symbols != NO_DEBUG)
5309 output_file_directive (asm_out_file, main_input_filename);
5311 aputs ("\t.version\t\"01.01\"\n");
5316 pa_file_start_mcount (const char *aswhat)
5319 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5323 pa_elf_file_start (void)
5325 pa_file_start_level ();
5326 pa_file_start_mcount ("ENTRY");
5327 pa_file_start_file (0);
5331 pa_som_file_start (void)
5333 pa_file_start_level ();
5334 pa_file_start_space (0);
5335 aputs ("\t.IMPORT $global$,DATA\n"
5336 "\t.IMPORT $$dyncall,MILLICODE\n");
5337 pa_file_start_mcount ("CODE");
5338 pa_file_start_file (0);
5342 pa_linux_file_start (void)
5344 pa_file_start_file (1);
5345 pa_file_start_level ();
5346 pa_file_start_mcount ("CODE");
5350 pa_hpux64_gas_file_start (void)
5352 pa_file_start_level ();
5353 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5355 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5357 pa_file_start_file (1);
5361 pa_hpux64_hpas_file_start (void)
5363 pa_file_start_level ();
5364 pa_file_start_space (1);
5365 pa_file_start_mcount ("CODE");
5366 pa_file_start_file (0);
5370 static struct deferred_plabel *
5371 get_plabel (const char *fname)
5375 /* See if we have already put this function on the list of deferred
5376 plabels. This list is generally small, so a liner search is not
5377 too ugly. If it proves too slow replace it with something faster. */
5378 for (i = 0; i < n_deferred_plabels; i++)
5379 if (strcmp (fname, deferred_plabels[i].name) == 0)
5382 /* If the deferred plabel list is empty, or this entry was not found
5383 on the list, create a new entry on the list. */
5384 if (deferred_plabels == NULL || i == n_deferred_plabels)
5386 const char *real_name;
5388 if (deferred_plabels == 0)
5389 deferred_plabels = (struct deferred_plabel *)
5390 ggc_alloc (sizeof (struct deferred_plabel));
5392 deferred_plabels = (struct deferred_plabel *)
5393 ggc_realloc (deferred_plabels,
5394 ((n_deferred_plabels + 1)
5395 * sizeof (struct deferred_plabel)));
5397 i = n_deferred_plabels++;
5398 deferred_plabels[i].internal_label = gen_label_rtx ();
5399 deferred_plabels[i].name = ggc_strdup (fname);
5401 /* Gross. We have just implicitly taken the address of this function,
5403 real_name = (*targetm.strip_name_encoding) (fname);
5404 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
5407 return &deferred_plabels[i];
5411 output_deferred_plabels (void)
5414 /* If we have deferred plabels, then we need to switch into the data
5415 section and align it to a 4 byte boundary before we output the
5416 deferred plabels. */
5417 if (n_deferred_plabels)
5420 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5423 /* Now output the deferred plabels. */
5424 for (i = 0; i < n_deferred_plabels; i++)
5426 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5427 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5428 assemble_integer (gen_rtx_SYMBOL_REF (Pmode, deferred_plabels[i].name),
5429 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5433 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5434 /* Initialize optabs to point to HPUX long double emulation routines. */
5436 pa_hpux_init_libfuncs (void)
5438 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5439 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5440 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5441 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5442 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5443 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5444 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5445 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5446 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5448 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5449 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5450 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5451 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5452 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5453 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5455 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5456 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5457 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5458 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5460 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5461 ? "__U_Qfcnvfxt_quad_to_sgl"
5462 : "_U_Qfcnvfxt_quad_to_sgl");
5463 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5464 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5465 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5467 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5468 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5472 /* HP's millicode routines mean something special to the assembler.
5473 Keep track of which ones we have used. */
5475 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5476 static void import_milli (enum millicodes);
5477 static char imported[(int) end1000];
5478 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5479 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5480 #define MILLI_START 10
5483 import_milli (enum millicodes code)
5485 char str[sizeof (import_string)];
5487 if (!imported[(int) code])
5489 imported[(int) code] = 1;
5490 strcpy (str, import_string);
5491 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5492 output_asm_insn (str, 0);
5496 /* The register constraints have put the operands and return value in
5497 the proper registers. */
5500 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5502 import_milli (mulI);
5503 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5506 /* Emit the rtl for doing a division by a constant. */
5508 /* Do magic division millicodes exist for this value? */
5509 static const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
5512 /* We'll use an array to keep track of the magic millicodes and
5513 whether or not we've used them already. [n][0] is signed, [n][1] is
5516 static int div_milli[16][2];
5519 div_operand (rtx op, enum machine_mode mode)
5521 return (mode == SImode
5522 && ((GET_CODE (op) == REG && REGNO (op) == 25)
5523 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
5524 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
5528 emit_hpdiv_const (rtx *operands, int unsignedp)
5530 if (GET_CODE (operands[2]) == CONST_INT
5531 && INTVAL (operands[2]) > 0
5532 && INTVAL (operands[2]) < 16
5533 && magic_milli[INTVAL (operands[2])])
5535 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5537 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5540 (PARALLEL, VOIDmode,
5541 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5542 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5544 gen_rtx_REG (SImode, 26),
5546 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5547 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5548 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5549 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5550 gen_rtx_CLOBBER (VOIDmode, ret))));
5551 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5558 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5562 /* If the divisor is a constant, try to use one of the special
5564 if (GET_CODE (operands[0]) == CONST_INT)
5566 static char buf[100];
5567 divisor = INTVAL (operands[0]);
5568 if (!div_milli[divisor][unsignedp])
5570 div_milli[divisor][unsignedp] = 1;
5572 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5574 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5578 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5579 INTVAL (operands[0]));
5580 return output_millicode_call (insn,
5581 gen_rtx_SYMBOL_REF (SImode, buf));
5585 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5586 INTVAL (operands[0]));
5587 return output_millicode_call (insn,
5588 gen_rtx_SYMBOL_REF (SImode, buf));
5591 /* Divisor isn't a special constant. */
5596 import_milli (divU);
5597 return output_millicode_call (insn,
5598 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5602 import_milli (divI);
5603 return output_millicode_call (insn,
5604 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5609 /* Output a $$rem millicode to do mod. */
5612 output_mod_insn (int unsignedp, rtx insn)
5616 import_milli (remU);
5617 return output_millicode_call (insn,
5618 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5622 import_milli (remI);
5623 return output_millicode_call (insn,
5624 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5629 output_arg_descriptor (rtx call_insn)
5631 const char *arg_regs[4];
5632 enum machine_mode arg_mode;
5634 int i, output_flag = 0;
5637 /* We neither need nor want argument location descriptors for the
5638 64bit runtime environment or the ELF32 environment. */
5639 if (TARGET_64BIT || TARGET_ELF32)
5642 for (i = 0; i < 4; i++)
5645 /* Specify explicitly that no argument relocations should take place
5646 if using the portable runtime calling conventions. */
5647 if (TARGET_PORTABLE_RUNTIME)
5649 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5654 if (GET_CODE (call_insn) != CALL_INSN)
5656 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
5658 rtx use = XEXP (link, 0);
5660 if (! (GET_CODE (use) == USE
5661 && GET_CODE (XEXP (use, 0)) == REG
5662 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5665 arg_mode = GET_MODE (XEXP (use, 0));
5666 regno = REGNO (XEXP (use, 0));
5667 if (regno >= 23 && regno <= 26)
5669 arg_regs[26 - regno] = "GR";
5670 if (arg_mode == DImode)
5671 arg_regs[25 - regno] = "GR";
5673 else if (regno >= 32 && regno <= 39)
5675 if (arg_mode == SFmode)
5676 arg_regs[(regno - 32) / 2] = "FR";
5679 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5680 arg_regs[(regno - 34) / 2] = "FR";
5681 arg_regs[(regno - 34) / 2 + 1] = "FU";
5683 arg_regs[(regno - 34) / 2] = "FU";
5684 arg_regs[(regno - 34) / 2 + 1] = "FR";
5689 fputs ("\t.CALL ", asm_out_file);
5690 for (i = 0; i < 4; i++)
5695 fputc (',', asm_out_file);
5696 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5699 fputc ('\n', asm_out_file);
5702 /* Return the class of any secondary reload register that is needed to
5703 move IN into a register in class CLASS using mode MODE.
5705 Profiling has showed this routine and its descendants account for
5706 a significant amount of compile time (~7%). So it has been
5707 optimized to reduce redundant computations and eliminate useless
5710 It might be worthwhile to try and make this a leaf function too. */
5713 secondary_reload_class (enum reg_class class, enum machine_mode mode, rtx in)
5715 int regno, is_symbolic;
5717 /* Trying to load a constant into a FP register during PIC code
5718 generation will require %r1 as a scratch register. */
5720 && GET_MODE_CLASS (mode) == MODE_INT
5721 && FP_REG_CLASS_P (class)
5722 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
5725 /* Profiling showed the PA port spends about 1.3% of its compilation
5726 time in true_regnum from calls inside secondary_reload_class. */
5728 if (GET_CODE (in) == REG)
5731 if (regno >= FIRST_PSEUDO_REGISTER)
5732 regno = true_regnum (in);
5734 else if (GET_CODE (in) == SUBREG)
5735 regno = true_regnum (in);
5739 /* If we have something like (mem (mem (...)), we can safely assume the
5740 inner MEM will end up in a general register after reloading, so there's
5741 no need for a secondary reload. */
5742 if (GET_CODE (in) == MEM
5743 && GET_CODE (XEXP (in, 0)) == MEM)
5746 /* Handle out of range displacement for integer mode loads/stores of
5748 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5749 && GET_MODE_CLASS (mode) == MODE_INT
5750 && FP_REG_CLASS_P (class))
5751 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5752 return GENERAL_REGS;
5754 /* A SAR<->FP register copy requires a secondary register (GPR) as
5755 well as secondary memory. */
5756 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5757 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5758 || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5759 return GENERAL_REGS;
5761 if (GET_CODE (in) == HIGH)
5764 /* Profiling has showed GCC spends about 2.6% of its compilation
5765 time in symbolic_operand from calls inside secondary_reload_class.
5767 We use an inline copy and only compute its return value once to avoid
5769 switch (GET_CODE (in))
5779 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
5780 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
5781 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
5791 && read_only_operand (in, VOIDmode))
5794 if (class != R1_REGS && is_symbolic)
5801 function_arg_padding (enum machine_mode mode, tree type)
5804 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5806 /* Return none if justification is not required. */
5808 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5809 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5812 /* The directions set here are ignored when a BLKmode argument larger
5813 than a word is placed in a register. Different code is used for
5814 the stack and registers. This makes it difficult to have a
5815 consistent data representation for both the stack and registers.
5816 For both runtimes, the justification and padding for arguments on
5817 the stack and in registers should be identical. */
5819 /* The 64-bit runtime specifies left justification for aggregates. */
5822 /* The 32-bit runtime architecture specifies right justification.
5823 When the argument is passed on the stack, the argument is padded
5824 with garbage on the left. The HP compiler pads with zeros. */
5828 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5835 /* Do what is necessary for `va_start'. We look at the current function
5836 to determine if stdargs or varargs is used and fill in an initial
5837 va_list. A pointer to this constructor is returned. */
5840 hppa_builtin_saveregs (void)
5843 tree fntype = TREE_TYPE (current_function_decl);
5844 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5845 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5846 != void_type_node)))
5847 ? UNITS_PER_WORD : 0);
5850 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5852 offset = current_function_arg_offset_rtx;
5858 /* Adjust for varargs/stdarg differences. */
5860 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5862 offset = current_function_arg_offset_rtx;
5864 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5865 from the incoming arg pointer and growing to larger addresses. */
5866 for (i = 26, off = -64; i >= 19; i--, off += 8)
5867 emit_move_insn (gen_rtx_MEM (word_mode,
5868 plus_constant (arg_pointer_rtx, off)),
5869 gen_rtx_REG (word_mode, i));
5871 /* The incoming args pointer points just beyond the flushback area;
5872 normally this is not a serious concern. However, when we are doing
5873 varargs/stdargs we want to make the arg pointer point to the start
5874 of the incoming argument area. */
5875 emit_move_insn (virtual_incoming_args_rtx,
5876 plus_constant (arg_pointer_rtx, -64));
5878 /* Now return a pointer to the first anonymous argument. */
5879 return copy_to_reg (expand_binop (Pmode, add_optab,
5880 virtual_incoming_args_rtx,
5881 offset, 0, 0, OPTAB_LIB_WIDEN));
5884 /* Store general registers on the stack. */
5885 dest = gen_rtx_MEM (BLKmode,
5886 plus_constant (current_function_internal_arg_pointer,
5888 set_mem_alias_set (dest, get_varargs_alias_set ());
5889 set_mem_align (dest, BITS_PER_WORD);
5890 move_block_from_reg (23, dest, 4);
5892 /* move_block_from_reg will emit code to store the argument registers
5893 individually as scalar stores.
5895 However, other insns may later load from the same addresses for
5896 a structure load (passing a struct to a varargs routine).
5898 The alias code assumes that such aliasing can never happen, so we
5899 have to keep memory referencing insns from moving up beyond the
5900 last argument register store. So we emit a blockage insn here. */
5901 emit_insn (gen_blockage ());
5903 return copy_to_reg (expand_binop (Pmode, add_optab,
5904 current_function_internal_arg_pointer,
5905 offset, 0, 0, OPTAB_LIB_WIDEN));
5909 hppa_va_start (tree valist, rtx nextarg)
5911 nextarg = expand_builtin_saveregs ();
5912 std_expand_builtin_va_start (valist, nextarg);
5916 hppa_va_arg (tree valist, tree type)
5918 HOST_WIDE_INT size = int_size_in_bytes (type);
5924 /* Every argument in PA64 is supposed to be passed by value
5925 (including large structs). However, as a GCC extension, we
5926 pass zero and variable sized arguments by reference. Empty
5927 structures are a GCC extension not supported by the HP
5928 compilers. Thus, passing them by reference isn't likely
5929 to conflict with the ABI. For variable sized arguments,
5930 GCC doesn't have the infrastructure to allocate these to
5933 /* Arguments with a size greater than 8 must be aligned 0 MOD 16. */
5935 if (size > UNITS_PER_WORD)
5937 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5938 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
5939 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
5940 build_int_2 (-2 * UNITS_PER_WORD, -1));
5941 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5942 TREE_SIDE_EFFECTS (t) = 1;
5943 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5947 return std_expand_builtin_va_arg (valist, type);
5950 ptr = build_pointer_type (type);
5952 /* Args grow upward. */
5953 t = build (POSTINCREMENT_EXPR, TREE_TYPE (valist), valist,
5954 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5955 TREE_SIDE_EFFECTS (t) = 1;
5957 pptr = build_pointer_type (ptr);
5958 t = build1 (NOP_EXPR, pptr, t);
5959 TREE_SIDE_EFFECTS (t) = 1;
5961 t = build1 (INDIRECT_REF, ptr, t);
5962 TREE_SIDE_EFFECTS (t) = 1;
5965 else /* !TARGET_64BIT */
5967 ptr = build_pointer_type (type);
5969 /* "Large" and variable sized types are passed by reference. */
5970 if (size > 8 || size <= 0)
5972 /* Args grow downward. */
5973 t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
5974 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5975 TREE_SIDE_EFFECTS (t) = 1;
5977 pptr = build_pointer_type (ptr);
5978 t = build1 (NOP_EXPR, pptr, t);
5979 TREE_SIDE_EFFECTS (t) = 1;
5981 t = build1 (INDIRECT_REF, ptr, t);
5982 TREE_SIDE_EFFECTS (t) = 1;
5986 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5987 build_int_2 (-size, -1));
5989 /* Copied from va-pa.h, but we probably don't need to align to
5990 word size, since we generate and preserve that invariant. */
5991 t = build (BIT_AND_EXPR, TREE_TYPE (valist), t,
5992 build_int_2 ((size > 4 ? -8 : -4), -1));
5994 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5995 TREE_SIDE_EFFECTS (t) = 1;
5997 ofs = (8 - size) % 4;
6000 t = build (PLUS_EXPR, TREE_TYPE (valist), t,
6001 build_int_2 (ofs, 0));
6002 TREE_SIDE_EFFECTS (t) = 1;
6005 t = build1 (NOP_EXPR, ptr, t);
6006 TREE_SIDE_EFFECTS (t) = 1;
6011 return expand_expr (t, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6016 /* This routine handles all the normal conditional branch sequences we
6017 might need to generate. It handles compare immediate vs compare
6018 register, nullification of delay slots, varying length branches,
6019 negated branches, and all combinations of the above. It returns the
6020 output appropriate to emit the branch corresponding to all given
6024 output_cbranch (rtx *operands, int nullify, int length, int negated, rtx insn)
6026 static char buf[100];
6030 /* A conditional branch to the following instruction (eg the delay slot)
6031 is asking for a disaster. This can happen when not optimizing and
6032 when jump optimization fails.
6034 While it is usually safe to emit nothing, this can fail if the
6035 preceding instruction is a nullified branch with an empty delay
6036 slot and the same branch target as this branch. We could check
6037 for this but jump optimization should eliminate nop jumps. It
6038 is always safe to emit a nop. */
6039 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6042 /* The doubleword form of the cmpib instruction doesn't have the LEU
6043 and GTU conditions while the cmpb instruction does. Since we accept
6044 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6045 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6046 operands[2] = gen_rtx_REG (DImode, 0);
6048 /* If this is a long branch with its delay slot unfilled, set `nullify'
6049 as it can nullify the delay slot and save a nop. */
6050 if (length == 8 && dbr_sequence_length () == 0)
6053 /* If this is a short forward conditional branch which did not get
6054 its delay slot filled, the delay slot can still be nullified. */
6055 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6056 nullify = forward_branch_p (insn);
6058 /* A forward branch over a single nullified insn can be done with a
6059 comclr instruction. This avoids a single cycle penalty due to
6060 mis-predicted branch if we fall through (branch not taken). */
6062 && next_real_insn (insn) != 0
6063 && get_attr_length (next_real_insn (insn)) == 4
6064 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6070 /* All short conditional branches except backwards with an unfilled
6074 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6076 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6077 if (GET_MODE (operands[1]) == DImode)
6080 strcat (buf, "%B3");
6082 strcat (buf, "%S3");
6084 strcat (buf, " %2,%r1,%%r0");
6086 strcat (buf, ",n %2,%r1,%0");
6088 strcat (buf, " %2,%r1,%0");
6091 /* All long conditionals. Note a short backward branch with an
6092 unfilled delay slot is treated just like a long backward branch
6093 with an unfilled delay slot. */
6095 /* Handle weird backwards branch with a filled delay slot
6096 with is nullified. */
6097 if (dbr_sequence_length () != 0
6098 && ! forward_branch_p (insn)
6101 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6102 if (GET_MODE (operands[1]) == DImode)
6105 strcat (buf, "%S3");
6107 strcat (buf, "%B3");
6108 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6110 /* Handle short backwards branch with an unfilled delay slot.
6111 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6112 taken and untaken branches. */
6113 else if (dbr_sequence_length () == 0
6114 && ! forward_branch_p (insn)
6115 && INSN_ADDRESSES_SET_P ()
6116 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6117 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6119 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6120 if (GET_MODE (operands[1]) == DImode)
6123 strcat (buf, "%B3 %2,%r1,%0%#");
6125 strcat (buf, "%S3 %2,%r1,%0%#");
6129 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6130 if (GET_MODE (operands[1]) == DImode)
6133 strcat (buf, "%S3");
6135 strcat (buf, "%B3");
6137 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6139 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6145 xoperands[0] = operands[0];
6146 xoperands[1] = operands[1];
6147 xoperands[2] = operands[2];
6148 xoperands[3] = operands[3];
6150 /* The reversed conditional branch must branch over one additional
6151 instruction if the delay slot is filled. If the delay slot
6152 is empty, the instruction after the reversed condition branch
6153 must be nullified. */
6154 nullify = dbr_sequence_length () == 0;
6155 xoperands[4] = nullify ? GEN_INT (length) : GEN_INT (length + 4);
6157 /* Create a reversed conditional branch which branches around
6158 the following insns. */
6159 if (GET_MODE (operands[1]) != DImode)
6165 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6168 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6174 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6177 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6186 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6189 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6195 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6198 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6202 output_asm_insn (buf, xoperands);
6203 return output_lbranch (operands[0], insn);
6211 /* This routine handles long unconditional branches that exceed the
6212 maximum range of a simple branch instruction. */
6215 output_lbranch (rtx dest, rtx insn)
6219 xoperands[0] = dest;
6221 /* First, free up the delay slot. */
6222 if (dbr_sequence_length () != 0)
6224 /* We can't handle a jump in the delay slot. */
6225 if (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN)
6228 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6229 optimize, 0, 0, NULL);
6231 /* Now delete the delay insn. */
6232 PUT_CODE (NEXT_INSN (insn), NOTE);
6233 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6234 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6237 /* Output an insn to save %r1. The runtime documentation doesn't
6238 specify whether the "Clean Up" slot in the callers frame can
6239 be clobbered by the callee. It isn't copied by HP's builtin
6240 alloca, so this suggests that it can be clobbered if necessary.
6241 The "Static Link" location is copied by HP builtin alloca, so
6242 we avoid using it. Using the cleanup slot might be a problem
6243 if we have to interoperate with languages that pass cleanup
6244 information. However, it should be possible to handle these
6245 situations with GCC's asm feature.
6247 The "Current RP" slot is reserved for the called procedure, so
6248 we try to use it when we don't have a frame of our own. It's
6249 rather unlikely that we won't have a frame when we need to emit
6252 Really the way to go long term is a register scavenger; goto
6253 the target of the jump and find a register which we can use
6254 as a scratch to hold the value in %r1. Then, we wouldn't have
6255 to free up the delay slot or clobber a slot that may be needed
6256 for other purposes. */
6259 if (actual_fsize == 0 && !regs_ever_live[2])
6260 /* Use the return pointer slot in the frame marker. */
6261 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6263 /* Use the slot at -40 in the frame marker since HP builtin
6264 alloca doesn't copy it. */
6265 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6269 if (actual_fsize == 0 && !regs_ever_live[2])
6270 /* Use the return pointer slot in the frame marker. */
6271 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6273 /* Use the "Clean Up" slot in the frame marker. In GCC,
6274 the only other use of this location is for copying a
6275 floating point double argument from a floating-point
6276 register to two general registers. The copy is done
6277 as an "atomic" operation when outputting a call, so it
6278 won't interfere with our using the location here. */
6279 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6282 if (TARGET_PORTABLE_RUNTIME)
6284 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6285 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6286 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6290 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6291 if (TARGET_SOM || !TARGET_GAS)
6293 xoperands[1] = gen_label_rtx ();
6294 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6295 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6296 CODE_LABEL_NUMBER (xoperands[1]));
6297 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6301 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6302 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6304 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6307 /* Now output a very long branch to the original target. */
6308 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6310 /* Now restore the value of %r1 in the delay slot. */
6313 if (actual_fsize == 0 && !regs_ever_live[2])
6314 return "ldd -16(%%r30),%%r1";
6316 return "ldd -40(%%r30),%%r1";
6320 if (actual_fsize == 0 && !regs_ever_live[2])
6321 return "ldw -20(%%r30),%%r1";
6323 return "ldw -12(%%r30),%%r1";
6327 /* This routine handles all the branch-on-bit conditional branch sequences we
6328 might need to generate. It handles nullification of delay slots,
6329 varying length branches, negated branches and all combinations of the
6330 above. it returns the appropriate output template to emit the branch. */
6333 output_bb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6334 int negated, rtx insn, int which)
6336 static char buf[100];
6339 /* A conditional branch to the following instruction (eg the delay slot) is
6340 asking for a disaster. I do not think this can happen as this pattern
6341 is only used when optimizing; jump optimization should eliminate the
6342 jump. But be prepared just in case. */
6344 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6347 /* If this is a long branch with its delay slot unfilled, set `nullify'
6348 as it can nullify the delay slot and save a nop. */
6349 if (length == 8 && dbr_sequence_length () == 0)
6352 /* If this is a short forward conditional branch which did not get
6353 its delay slot filled, the delay slot can still be nullified. */
6354 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6355 nullify = forward_branch_p (insn);
6357 /* A forward branch over a single nullified insn can be done with a
6358 extrs instruction. This avoids a single cycle penalty due to
6359 mis-predicted branch if we fall through (branch not taken). */
6362 && next_real_insn (insn) != 0
6363 && get_attr_length (next_real_insn (insn)) == 4
6364 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6371 /* All short conditional branches except backwards with an unfilled
6375 strcpy (buf, "{extrs,|extrw,s,}");
6377 strcpy (buf, "bb,");
6378 if (useskip && GET_MODE (operands[0]) == DImode)
6379 strcpy (buf, "extrd,s,*");
6380 else if (GET_MODE (operands[0]) == DImode)
6381 strcpy (buf, "bb,*");
6382 if ((which == 0 && negated)
6383 || (which == 1 && ! negated))
6388 strcat (buf, " %0,%1,1,%%r0");
6389 else if (nullify && negated)
6390 strcat (buf, ",n %0,%1,%3");
6391 else if (nullify && ! negated)
6392 strcat (buf, ",n %0,%1,%2");
6393 else if (! nullify && negated)
6394 strcat (buf, "%0,%1,%3");
6395 else if (! nullify && ! negated)
6396 strcat (buf, " %0,%1,%2");
6399 /* All long conditionals. Note a short backward branch with an
6400 unfilled delay slot is treated just like a long backward branch
6401 with an unfilled delay slot. */
6403 /* Handle weird backwards branch with a filled delay slot
6404 with is nullified. */
6405 if (dbr_sequence_length () != 0
6406 && ! forward_branch_p (insn)
6409 strcpy (buf, "bb,");
6410 if (GET_MODE (operands[0]) == DImode)
6412 if ((which == 0 && negated)
6413 || (which == 1 && ! negated))
6418 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6420 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6422 /* Handle short backwards branch with an unfilled delay slot.
6423 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6424 taken and untaken branches. */
6425 else if (dbr_sequence_length () == 0
6426 && ! forward_branch_p (insn)
6427 && INSN_ADDRESSES_SET_P ()
6428 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6429 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6431 strcpy (buf, "bb,");
6432 if (GET_MODE (operands[0]) == DImode)
6434 if ((which == 0 && negated)
6435 || (which == 1 && ! negated))
6440 strcat (buf, " %0,%1,%3%#");
6442 strcat (buf, " %0,%1,%2%#");
6446 strcpy (buf, "{extrs,|extrw,s,}");
6447 if (GET_MODE (operands[0]) == DImode)
6448 strcpy (buf, "extrd,s,*");
6449 if ((which == 0 && negated)
6450 || (which == 1 && ! negated))
6454 if (nullify && negated)
6455 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6456 else if (nullify && ! negated)
6457 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6459 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6461 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6471 /* This routine handles all the branch-on-variable-bit conditional branch
6472 sequences we might need to generate. It handles nullification of delay
6473 slots, varying length branches, negated branches and all combinations
6474 of the above. it returns the appropriate output template to emit the
6478 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6479 int negated, rtx insn, int which)
6481 static char buf[100];
6484 /* A conditional branch to the following instruction (eg the delay slot) is
6485 asking for a disaster. I do not think this can happen as this pattern
6486 is only used when optimizing; jump optimization should eliminate the
6487 jump. But be prepared just in case. */
6489 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6492 /* If this is a long branch with its delay slot unfilled, set `nullify'
6493 as it can nullify the delay slot and save a nop. */
6494 if (length == 8 && dbr_sequence_length () == 0)
6497 /* If this is a short forward conditional branch which did not get
6498 its delay slot filled, the delay slot can still be nullified. */
6499 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6500 nullify = forward_branch_p (insn);
6502 /* A forward branch over a single nullified insn can be done with a
6503 extrs instruction. This avoids a single cycle penalty due to
6504 mis-predicted branch if we fall through (branch not taken). */
6507 && next_real_insn (insn) != 0
6508 && get_attr_length (next_real_insn (insn)) == 4
6509 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6516 /* All short conditional branches except backwards with an unfilled
6520 strcpy (buf, "{vextrs,|extrw,s,}");
6522 strcpy (buf, "{bvb,|bb,}");
6523 if (useskip && GET_MODE (operands[0]) == DImode)
6524 strcpy (buf, "extrd,s,*");
6525 else if (GET_MODE (operands[0]) == DImode)
6526 strcpy (buf, "bb,*");
6527 if ((which == 0 && negated)
6528 || (which == 1 && ! negated))
6533 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6534 else if (nullify && negated)
6535 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6536 else if (nullify && ! negated)
6537 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6538 else if (! nullify && negated)
6539 strcat (buf, "{%0,%3|%0,%%sar,%3}");
6540 else if (! nullify && ! negated)
6541 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6544 /* All long conditionals. Note a short backward branch with an
6545 unfilled delay slot is treated just like a long backward branch
6546 with an unfilled delay slot. */
6548 /* Handle weird backwards branch with a filled delay slot
6549 with is nullified. */
6550 if (dbr_sequence_length () != 0
6551 && ! forward_branch_p (insn)
6554 strcpy (buf, "{bvb,|bb,}");
6555 if (GET_MODE (operands[0]) == DImode)
6557 if ((which == 0 && negated)
6558 || (which == 1 && ! negated))
6563 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6565 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6567 /* Handle short backwards branch with an unfilled delay slot.
6568 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6569 taken and untaken branches. */
6570 else if (dbr_sequence_length () == 0
6571 && ! forward_branch_p (insn)
6572 && INSN_ADDRESSES_SET_P ()
6573 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6574 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6576 strcpy (buf, "{bvb,|bb,}");
6577 if (GET_MODE (operands[0]) == DImode)
6579 if ((which == 0 && negated)
6580 || (which == 1 && ! negated))
6585 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6587 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6591 strcpy (buf, "{vextrs,|extrw,s,}");
6592 if (GET_MODE (operands[0]) == DImode)
6593 strcpy (buf, "extrd,s,*");
6594 if ((which == 0 && negated)
6595 || (which == 1 && ! negated))
6599 if (nullify && negated)
6600 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6601 else if (nullify && ! negated)
6602 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6604 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6606 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6616 /* Return the output template for emitting a dbra type insn.
6618 Note it may perform some output operations on its own before
6619 returning the final output string. */
6621 output_dbra (rtx *operands, rtx insn, int which_alternative)
6624 /* A conditional branch to the following instruction (eg the delay slot) is
6625 asking for a disaster. Be prepared! */
6627 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6629 if (which_alternative == 0)
6630 return "ldo %1(%0),%0";
6631 else if (which_alternative == 1)
6633 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6634 output_asm_insn ("ldw -16(%%r30),%4", operands);
6635 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6636 return "{fldws|fldw} -16(%%r30),%0";
6640 output_asm_insn ("ldw %0,%4", operands);
6641 return "ldo %1(%4),%4\n\tstw %4,%0";
6645 if (which_alternative == 0)
6647 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6648 int length = get_attr_length (insn);
6650 /* If this is a long branch with its delay slot unfilled, set `nullify'
6651 as it can nullify the delay slot and save a nop. */
6652 if (length == 8 && dbr_sequence_length () == 0)
6655 /* If this is a short forward conditional branch which did not get
6656 its delay slot filled, the delay slot can still be nullified. */
6657 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6658 nullify = forward_branch_p (insn);
6660 /* Handle short versions first. */
6661 if (length == 4 && nullify)
6662 return "addib,%C2,n %1,%0,%3";
6663 else if (length == 4 && ! nullify)
6664 return "addib,%C2 %1,%0,%3";
6665 else if (length == 8)
6667 /* Handle weird backwards branch with a fulled delay slot
6668 which is nullified. */
6669 if (dbr_sequence_length () != 0
6670 && ! forward_branch_p (insn)
6672 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6673 /* Handle short backwards branch with an unfilled delay slot.
6674 Using a addb;nop rather than addi;bl saves 1 cycle for both
6675 taken and untaken branches. */
6676 else if (dbr_sequence_length () == 0
6677 && ! forward_branch_p (insn)
6678 && INSN_ADDRESSES_SET_P ()
6679 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6680 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6681 return "addib,%C2 %1,%0,%3%#";
6683 /* Handle normal cases. */
6685 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6687 return "addi,%N2 %1,%0,%0\n\tb %3";
6692 /* Deal with gross reload from FP register case. */
6693 else if (which_alternative == 1)
6695 /* Move loop counter from FP register to MEM then into a GR,
6696 increment the GR, store the GR into MEM, and finally reload
6697 the FP register from MEM from within the branch's delay slot. */
6698 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6700 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6701 if (get_attr_length (insn) == 24)
6702 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6704 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6706 /* Deal with gross reload from memory case. */
6709 /* Reload loop counter from memory, the store back to memory
6710 happens in the branch's delay slot. */
6711 output_asm_insn ("ldw %0,%4", operands);
6712 if (get_attr_length (insn) == 12)
6713 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6715 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6719 /* Return the output template for emitting a dbra type insn.
6721 Note it may perform some output operations on its own before
6722 returning the final output string. */
6724 output_movb (rtx *operands, rtx insn, int which_alternative,
6725 int reverse_comparison)
6728 /* A conditional branch to the following instruction (eg the delay slot) is
6729 asking for a disaster. Be prepared! */
6731 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6733 if (which_alternative == 0)
6734 return "copy %1,%0";
6735 else if (which_alternative == 1)
6737 output_asm_insn ("stw %1,-16(%%r30)", operands);
6738 return "{fldws|fldw} -16(%%r30),%0";
6740 else if (which_alternative == 2)
6746 /* Support the second variant. */
6747 if (reverse_comparison)
6748 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6750 if (which_alternative == 0)
6752 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6753 int length = get_attr_length (insn);
6755 /* If this is a long branch with its delay slot unfilled, set `nullify'
6756 as it can nullify the delay slot and save a nop. */
6757 if (length == 8 && dbr_sequence_length () == 0)
6760 /* If this is a short forward conditional branch which did not get
6761 its delay slot filled, the delay slot can still be nullified. */
6762 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6763 nullify = forward_branch_p (insn);
6765 /* Handle short versions first. */
6766 if (length == 4 && nullify)
6767 return "movb,%C2,n %1,%0,%3";
6768 else if (length == 4 && ! nullify)
6769 return "movb,%C2 %1,%0,%3";
6770 else if (length == 8)
6772 /* Handle weird backwards branch with a filled delay slot
6773 which is nullified. */
6774 if (dbr_sequence_length () != 0
6775 && ! forward_branch_p (insn)
6777 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6779 /* Handle short backwards branch with an unfilled delay slot.
6780 Using a movb;nop rather than or;bl saves 1 cycle for both
6781 taken and untaken branches. */
6782 else if (dbr_sequence_length () == 0
6783 && ! forward_branch_p (insn)
6784 && INSN_ADDRESSES_SET_P ()
6785 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6786 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6787 return "movb,%C2 %1,%0,%3%#";
6788 /* Handle normal cases. */
6790 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6792 return "or,%N2 %1,%%r0,%0\n\tb %3";
6797 /* Deal with gross reload from FP register case. */
6798 else if (which_alternative == 1)
6800 /* Move loop counter from FP register to MEM then into a GR,
6801 increment the GR, store the GR into MEM, and finally reload
6802 the FP register from MEM from within the branch's delay slot. */
6803 output_asm_insn ("stw %1,-16(%%r30)", operands);
6804 if (get_attr_length (insn) == 12)
6805 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6807 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6809 /* Deal with gross reload from memory case. */
6810 else if (which_alternative == 2)
6812 /* Reload loop counter from memory, the store back to memory
6813 happens in the branch's delay slot. */
6814 if (get_attr_length (insn) == 8)
6815 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6817 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6819 /* Handle SAR as a destination. */
6822 if (get_attr_length (insn) == 8)
6823 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6825 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
6829 /* Copy any FP arguments in INSN into integer registers. */
6831 copy_fp_args (rtx insn)
6836 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6838 int arg_mode, regno;
6839 rtx use = XEXP (link, 0);
6841 if (! (GET_CODE (use) == USE
6842 && GET_CODE (XEXP (use, 0)) == REG
6843 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6846 arg_mode = GET_MODE (XEXP (use, 0));
6847 regno = REGNO (XEXP (use, 0));
6849 /* Is it a floating point register? */
6850 if (regno >= 32 && regno <= 39)
6852 /* Copy the FP register into an integer register via memory. */
6853 if (arg_mode == SFmode)
6855 xoperands[0] = XEXP (use, 0);
6856 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6857 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
6858 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6862 xoperands[0] = XEXP (use, 0);
6863 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6864 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
6865 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6866 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6872 /* Compute length of the FP argument copy sequence for INSN. */
6874 length_fp_args (rtx insn)
6879 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6881 int arg_mode, regno;
6882 rtx use = XEXP (link, 0);
6884 if (! (GET_CODE (use) == USE
6885 && GET_CODE (XEXP (use, 0)) == REG
6886 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6889 arg_mode = GET_MODE (XEXP (use, 0));
6890 regno = REGNO (XEXP (use, 0));
6892 /* Is it a floating point register? */
6893 if (regno >= 32 && regno <= 39)
6895 if (arg_mode == SFmode)
6905 /* Return the attribute length for the millicode call instruction INSN.
6906 The length must match the code generated by output_millicode_call.
6907 We include the delay slot in the returned length as it is better to
6908 over estimate the length than to under estimate it. */
6911 attr_length_millicode_call (rtx insn)
6913 unsigned long distance = -1;
6914 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
6916 if (INSN_ADDRESSES_SET_P ())
6918 distance = (total + insn_current_reference_address (insn));
6919 if (distance < total)
6925 if (!TARGET_LONG_CALLS && distance < 7600000)
6930 else if (TARGET_PORTABLE_RUNTIME)
6934 if (!TARGET_LONG_CALLS && distance < 240000)
6937 if (TARGET_LONG_ABS_CALL && !flag_pic)
6944 /* INSN is a function call. It may have an unconditional jump
6947 CALL_DEST is the routine we are calling. */
6950 output_millicode_call (rtx insn, rtx call_dest)
6952 int attr_length = get_attr_length (insn);
6953 int seq_length = dbr_sequence_length ();
6958 xoperands[0] = call_dest;
6959 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6961 /* Handle the common case where we are sure that the branch will
6962 reach the beginning of the $CODE$ subspace. The within reach
6963 form of the $$sh_func_adrs call has a length of 28. Because
6964 it has an attribute type of multi, it never has a nonzero
6965 sequence length. The length of the $$sh_func_adrs is the same
6966 as certain out of reach PIC calls to other routines. */
6967 if (!TARGET_LONG_CALLS
6968 && ((seq_length == 0
6969 && (attr_length == 12
6970 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
6971 || (seq_length != 0 && attr_length == 8)))
6973 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
6979 /* It might seem that one insn could be saved by accessing
6980 the millicode function using the linkage table. However,
6981 this doesn't work in shared libraries and other dynamically
6982 loaded objects. Using a pc-relative sequence also avoids
6983 problems related to the implicit use of the gp register. */
6984 output_asm_insn ("b,l .+8,%%r1", xoperands);
6988 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
6989 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6993 xoperands[1] = gen_label_rtx ();
6994 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6995 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6996 CODE_LABEL_NUMBER (xoperands[1]));
6997 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7000 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7002 else if (TARGET_PORTABLE_RUNTIME)
7004 /* Pure portable runtime doesn't allow be/ble; we also don't
7005 have PIC support in the assembler/linker, so this sequence
7008 /* Get the address of our target into %r1. */
7009 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7010 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7012 /* Get our return address into %r31. */
7013 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7014 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7016 /* Jump to our target address in %r1. */
7017 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7021 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7023 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7025 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7029 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7030 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7032 if (TARGET_SOM || !TARGET_GAS)
7034 /* The HP assembler can generate relocations for the
7035 difference of two symbols. GAS can do this for a
7036 millicode symbol but not an arbitrary external
7037 symbol when generating SOM output. */
7038 xoperands[1] = gen_label_rtx ();
7039 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7040 CODE_LABEL_NUMBER (xoperands[1]));
7041 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7042 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7046 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7047 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7051 /* Jump to our target address in %r1. */
7052 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7056 if (seq_length == 0)
7057 output_asm_insn ("nop", xoperands);
7059 /* We are done if there isn't a jump in the delay slot. */
7060 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7063 /* This call has an unconditional jump in its delay slot. */
7064 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7066 /* See if the return address can be adjusted. Use the containing
7067 sequence insn's address. */
7068 if (INSN_ADDRESSES_SET_P ())
7070 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7071 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7072 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7074 if (VAL_14_BITS_P (distance))
7076 xoperands[1] = gen_label_rtx ();
7077 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7078 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7079 CODE_LABEL_NUMBER (xoperands[1]));
7082 /* ??? This branch may not reach its target. */
7083 output_asm_insn ("nop\n\tb,n %0", xoperands);
7086 /* ??? This branch may not reach its target. */
7087 output_asm_insn ("nop\n\tb,n %0", xoperands);
7089 /* Delete the jump. */
7090 PUT_CODE (NEXT_INSN (insn), NOTE);
7091 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7092 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7097 /* Return the attribute length of the call instruction INSN. The SIBCALL
7098 flag indicates whether INSN is a regular call or a sibling call. The
7099 length returned must be longer than the code actually generated by
7100 output_call. Since branch shortening is done before delay branch
7101 sequencing, there is no way to determine whether or not the delay
7102 slot will be filled during branch shortening. Even when the delay
7103 slot is filled, we may have to add a nop if the delay slot contains
7104 a branch that can't reach its target. Thus, we always have to include
7105 the delay slot in the length estimate. This used to be done in
7106 pa_adjust_insn_length but we do it here now as some sequences always
7107 fill the delay slot and we can save four bytes in the estimate for
7111 attr_length_call (rtx insn, int sibcall)
7117 rtx pat = PATTERN (insn);
7118 unsigned long distance = -1;
7120 if (INSN_ADDRESSES_SET_P ())
7122 unsigned long total;
7124 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7125 distance = (total + insn_current_reference_address (insn));
7126 if (distance < total)
7130 /* Determine if this is a local call. */
7131 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL)
7132 call_dest = XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0);
7134 call_dest = XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0);
7136 call_decl = SYMBOL_REF_DECL (call_dest);
7137 local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7139 /* pc-relative branch. */
7140 if (!TARGET_LONG_CALLS
7141 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7142 || distance < 240000))
7145 /* 64-bit plabel sequence. */
7146 else if (TARGET_64BIT && !local_call)
7147 length += sibcall ? 28 : 24;
7149 /* non-pic long absolute branch sequence. */
7150 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7153 /* long pc-relative branch sequence. */
7154 else if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7155 || (TARGET_64BIT && !TARGET_GAS)
7156 || (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7160 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS)
7164 /* 32-bit plabel sequence. */
7170 length += length_fp_args (insn);
7180 if (!TARGET_NO_SPACE_REGS)
7188 /* INSN is a function call. It may have an unconditional jump
7191 CALL_DEST is the routine we are calling. */
7194 output_call (rtx insn, rtx call_dest, int sibcall)
7196 int delay_insn_deleted = 0;
7197 int delay_slot_filled = 0;
7198 int seq_length = dbr_sequence_length ();
7199 tree call_decl = SYMBOL_REF_DECL (call_dest);
7200 int local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7203 xoperands[0] = call_dest;
7205 /* Handle the common case where we're sure that the branch will reach
7206 the beginning of the "$CODE$" subspace. This is the beginning of
7207 the current function if we are in a named section. */
7208 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7210 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7211 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7215 if (TARGET_64BIT && !local_call)
7217 /* ??? As far as I can tell, the HP linker doesn't support the
7218 long pc-relative sequence described in the 64-bit runtime
7219 architecture. So, we use a slightly longer indirect call. */
7220 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
7222 xoperands[0] = p->internal_label;
7223 xoperands[1] = gen_label_rtx ();
7225 /* If this isn't a sibcall, we put the load of %r27 into the
7226 delay slot. We can't do this in a sibcall as we don't
7227 have a second call-clobbered scratch register available. */
7229 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7232 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7233 optimize, 0, 0, NULL);
7235 /* Now delete the delay insn. */
7236 PUT_CODE (NEXT_INSN (insn), NOTE);
7237 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7238 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7239 delay_insn_deleted = 1;
7242 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7243 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7244 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7248 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7249 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7250 output_asm_insn ("bve (%%r1)", xoperands);
7254 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7255 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7256 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7257 delay_slot_filled = 1;
7262 int indirect_call = 0;
7264 /* Emit a long call. There are several different sequences
7265 of increasing length and complexity. In most cases,
7266 they don't allow an instruction in the delay slot. */
7267 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7268 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7269 && !(TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7274 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7276 && (!TARGET_PA_20 || indirect_call))
7278 /* A non-jump insn in the delay slot. By definition we can
7279 emit this insn before the call (and in fact before argument
7281 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0,
7284 /* Now delete the delay insn. */
7285 PUT_CODE (NEXT_INSN (insn), NOTE);
7286 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7287 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7288 delay_insn_deleted = 1;
7291 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7293 /* This is the best sequence for making long calls in
7294 non-pic code. Unfortunately, GNU ld doesn't provide
7295 the stub needed for external calls, and GAS's support
7296 for this with the SOM linker is buggy. It is safe
7297 to use this for local calls. */
7298 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7300 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7304 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7307 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7309 output_asm_insn ("copy %%r31,%%r2", xoperands);
7310 delay_slot_filled = 1;
7315 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7316 || (TARGET_64BIT && !TARGET_GAS))
7318 /* The HP assembler and linker can handle relocations
7319 for the difference of two symbols. GAS and the HP
7320 linker can't do this when one of the symbols is
7322 xoperands[1] = gen_label_rtx ();
7323 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7324 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7325 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7326 CODE_LABEL_NUMBER (xoperands[1]));
7327 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7329 else if (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7331 /* GAS currently can't generate the relocations that
7332 are needed for the SOM linker under HP-UX using this
7333 sequence. The GNU linker doesn't generate the stubs
7334 that are needed for external calls on TARGET_ELF32
7335 with this sequence. For now, we have to use a
7336 longer plabel sequence when using GAS. */
7337 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7338 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7340 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7345 /* Emit a long plabel-based call sequence. This is
7346 essentially an inline implementation of $$dyncall.
7347 We don't actually try to call $$dyncall as this is
7348 as difficult as calling the function itself. */
7349 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
7351 xoperands[0] = p->internal_label;
7352 xoperands[1] = gen_label_rtx ();
7354 /* Since the call is indirect, FP arguments in registers
7355 need to be copied to the general registers. Then, the
7356 argument relocation stub will copy them back. */
7358 copy_fp_args (insn);
7362 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7363 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7364 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7368 output_asm_insn ("addil LR'%0-$global$,%%r27",
7370 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7374 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7375 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7376 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7377 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7379 if (!sibcall && !TARGET_PA_20)
7381 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7382 if (TARGET_NO_SPACE_REGS)
7383 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7385 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7392 output_asm_insn ("bve (%%r1)", xoperands);
7397 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7398 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7399 delay_slot_filled = 1;
7402 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7407 if (!TARGET_NO_SPACE_REGS)
7408 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7413 if (TARGET_NO_SPACE_REGS)
7414 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7416 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7420 if (TARGET_NO_SPACE_REGS)
7421 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7423 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7426 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7428 output_asm_insn ("copy %%r31,%%r2", xoperands);
7429 delay_slot_filled = 1;
7436 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7437 output_asm_insn ("nop", xoperands);
7439 /* We are done if there isn't a jump in the delay slot. */
7441 || delay_insn_deleted
7442 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7445 /* A sibcall should never have a branch in the delay slot. */
7449 /* This call has an unconditional jump in its delay slot. */
7450 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7452 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7454 /* See if the return address can be adjusted. Use the containing
7455 sequence insn's address. */
7456 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7457 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7458 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7460 if (VAL_14_BITS_P (distance))
7462 xoperands[1] = gen_label_rtx ();
7463 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7464 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7465 CODE_LABEL_NUMBER (xoperands[1]));
7468 output_asm_insn ("nop\n\tb,n %0", xoperands);
7471 output_asm_insn ("b,n %0", xoperands);
7473 /* Delete the jump. */
7474 PUT_CODE (NEXT_INSN (insn), NOTE);
7475 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7476 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7481 /* Return the attribute length of the indirect call instruction INSN.
7482 The length must match the code generated by output_indirect call.
7483 The returned length includes the delay slot. Currently, the delay
7484 slot of an indirect call sequence is not exposed and it is used by
7485 the sequence itself. */
7488 attr_length_indirect_call (rtx insn)
7490 unsigned long distance = -1;
7491 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7493 if (INSN_ADDRESSES_SET_P ())
7495 distance = (total + insn_current_reference_address (insn));
7496 if (distance < total)
7503 if (TARGET_FAST_INDIRECT_CALLS
7504 || (!TARGET_PORTABLE_RUNTIME
7505 && ((TARGET_PA_20 && distance < 7600000) || distance < 240000)))
7511 if (TARGET_PORTABLE_RUNTIME)
7514 /* Out of reach, can use ble. */
7519 output_indirect_call (rtx insn, rtx call_dest)
7525 xoperands[0] = call_dest;
7526 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7527 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7531 /* First the special case for kernels, level 0 systems, etc. */
7532 if (TARGET_FAST_INDIRECT_CALLS)
7533 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7535 /* Now the normal case -- we can reach $$dyncall directly or
7536 we're sure that we can get there via a long-branch stub.
7538 No need to check target flags as the length uniquely identifies
7539 the remaining cases. */
7540 if (attr_length_indirect_call (insn) == 8)
7541 return ".CALL\tARGW0=GR\n\t{bl|b,l} $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7543 /* Long millicode call, but we are not generating PIC or portable runtime
7545 if (attr_length_indirect_call (insn) == 12)
7546 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7548 /* Long millicode call for portable runtime. */
7549 if (attr_length_indirect_call (insn) == 20)
7550 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7552 /* We need a long PIC call to $$dyncall. */
7553 xoperands[0] = NULL_RTX;
7554 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7555 if (TARGET_SOM || !TARGET_GAS)
7557 xoperands[0] = gen_label_rtx ();
7558 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7559 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7560 CODE_LABEL_NUMBER (xoperands[0]));
7561 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7565 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7566 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7569 output_asm_insn ("blr %%r0,%%r2", xoperands);
7570 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7574 /* Return the total length of the save and restore instructions needed for
7575 the data linkage table pointer (i.e., the PIC register) across the call
7576 instruction INSN. No-return calls do not require a save and restore.
7577 In addition, we may be able to avoid the save and restore for calls
7578 within the same translation unit. */
7581 attr_length_save_restore_dltp (rtx insn)
7583 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7589 /* In HPUX 8.0's shared library scheme, special relocations are needed
7590 for function labels if they might be passed to a function
7591 in a shared library (because shared libraries don't live in code
7592 space), and special magic is needed to construct their address. */
7595 hppa_encode_label (rtx sym)
7597 const char *str = XSTR (sym, 0);
7598 int len = strlen (str) + 1;
7601 p = newstr = alloca (len + 1);
7605 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7609 pa_encode_section_info (tree decl, rtx rtl, int first)
7611 if (first && TEXT_SPACE_P (decl))
7613 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7614 if (TREE_CODE (decl) == FUNCTION_DECL)
7615 hppa_encode_label (XEXP (rtl, 0));
7619 /* This is sort of inverse to pa_encode_section_info. */
7622 pa_strip_name_encoding (const char *str)
7624 str += (*str == '@');
7625 str += (*str == '*');
7630 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7632 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7635 /* Returns 1 if OP is a function label involved in a simple addition
7636 with a constant. Used to keep certain patterns from matching
7637 during instruction combination. */
7639 is_function_label_plus_const (rtx op)
7641 /* Strip off any CONST. */
7642 if (GET_CODE (op) == CONST)
7645 return (GET_CODE (op) == PLUS
7646 && function_label_operand (XEXP (op, 0), Pmode)
7647 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7650 /* Output assembly code for a thunk to FUNCTION. */
7653 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7654 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7657 const char *fname = XSTR (XEXP (DECL_RTL (function), 0), 0);
7658 const char *tname = XSTR (XEXP (DECL_RTL (thunk_fndecl), 0), 0);
7659 int val_14 = VAL_14_BITS_P (delta);
7661 static unsigned int current_thunk_number;
7664 ASM_OUTPUT_LABEL (file, tname);
7665 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7667 fname = (*targetm.strip_name_encoding) (fname);
7668 tname = (*targetm.strip_name_encoding) (tname);
7670 /* Output the thunk. We know that the function is in the same
7671 translation unit (i.e., the same space) as the thunk, and that
7672 thunks are output after their method. Thus, we don't need an
7673 external branch to reach the function. With SOM and GAS,
7674 functions and thunks are effectively in different sections.
7675 Thus, we can always use a IA-relative branch and the linker
7676 will add a long branch stub if necessary.
7678 However, we have to be careful when generating PIC code on the
7679 SOM port to ensure that the sequence does not transfer to an
7680 import stub for the target function as this could clobber the
7681 return value saved at SP-24. This would also apply to the
7682 32-bit linux port if the multi-space model is implemented. */
7683 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7684 && !(flag_pic && TREE_PUBLIC (function))
7685 && (TARGET_GAS || last_address < 262132))
7686 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7687 && ((targetm.have_named_sections
7688 && DECL_SECTION_NAME (thunk_fndecl) != NULL
7689 /* The GNU 64-bit linker has rather poor stub management.
7690 So, we use a long branch from thunks that aren't in
7691 the same section as the target function. */
7693 && (DECL_SECTION_NAME (thunk_fndecl)
7694 != DECL_SECTION_NAME (function)))
7695 || ((DECL_SECTION_NAME (thunk_fndecl)
7696 == DECL_SECTION_NAME (function))
7697 && last_address < 262132)))
7698 || (!targetm.have_named_sections && last_address < 262132))))
7702 fprintf (file, "\tb %s\n\tldo " HOST_WIDE_INT_PRINT_DEC
7703 "(%%r26),%%r26\n", fname, delta);
7708 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7710 fprintf (file, "\tb %s\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7711 "(%%r1),%%r26\n", fname, delta);
7715 else if (TARGET_64BIT)
7717 /* We only have one call-clobbered scratch register, so we can't
7718 make use of the delay slot if delta doesn't fit in 14 bits. */
7720 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7721 ",%%r26\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7722 "(%%r1),%%r26\n", delta, delta);
7724 fprintf (file, "\tb,l .+8,%%r1\n");
7728 fprintf (file, "\taddil L'%s-$PIC_pcrel$0+4,%%r1\n", fname);
7729 fprintf (file, "\tldo R'%s-$PIC_pcrel$0+8(%%r1),%%r1\n", fname);
7733 int off = val_14 ? 8 : 16;
7734 fprintf (file, "\taddil L'%s-%s-%d,%%r1\n", fname, tname, off);
7735 fprintf (file, "\tldo R'%s-%s-%d(%%r1),%%r1\n", fname, tname, off);
7740 fprintf (file, "\tbv %%r0(%%r1)\n\tldo ");
7741 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7746 fprintf (file, "\tbv,n %%r0(%%r1)\n");
7750 else if (TARGET_PORTABLE_RUNTIME)
7752 fprintf (file, "\tldil L'%s,%%r1\n", fname);
7753 fprintf (file, "\tldo R'%s(%%r1),%%r22\n", fname);
7757 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7758 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7763 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7765 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7766 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7770 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7772 /* The function is accessible from outside this module. The only
7773 way to avoid an import stub between the thunk and function is to
7774 call the function directly with an indirect sequence similar to
7775 that used by $$dyncall. This is possible because $$dyncall acts
7776 as the import stub in an indirect call. */
7779 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7780 lab = (*targetm.strip_name_encoding) (label);
7782 fprintf (file, "\taddil LT'%s,%%r19\n", lab);
7783 fprintf (file, "\tldw RT'%s(%%r1),%%r22\n", lab);
7784 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7785 fprintf (file, "\tbb,>=,n %%r22,30,.+16\n");
7786 fprintf (file, "\tdepi 0,31,2,%%r22\n");
7787 fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
7788 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7791 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7797 fprintf (file, "\tbve (%%r22)\n\tldo ");
7802 if (TARGET_NO_SPACE_REGS)
7804 fprintf (file, "\tbe 0(%%sr4,%%r22)\n\tldo ");
7809 fprintf (file, "\tldsid (%%sr0,%%r22),%%r21\n");
7810 fprintf (file, "\tmtsp %%r21,%%sr0\n");
7811 fprintf (file, "\tbe 0(%%sr0,%%r22)\n\tldo ");
7817 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7819 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7824 fprintf (file, "\tb,l .+8,%%r1\n");
7826 fprintf (file, "\tbl .+8,%%r1\n");
7828 if (TARGET_SOM || !TARGET_GAS)
7830 fprintf (file, "\taddil L'%s-%s-8,%%r1\n", fname, tname);
7831 fprintf (file, "\tldo R'%s-%s-8(%%r1),%%r22\n", fname, tname);
7835 fprintf (file, "\taddil L'%s-$PIC_pcrel$0+4,%%r1\n", fname);
7836 fprintf (file, "\tldo R'%s-$PIC_pcrel$0+8(%%r1),%%r22\n", fname);
7841 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7842 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7847 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7849 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7850 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7857 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC ",%%r26\n", delta);
7859 fprintf (file, "\tldil L'%s,%%r22\n", fname);
7860 fprintf (file, "\tbe R'%s(%%sr4,%%r22)\n\tldo ", fname);
7864 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7869 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7874 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
7876 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7879 fprintf (file, "\t.align 4\n");
7880 ASM_OUTPUT_LABEL (file, label);
7881 fprintf (file, "\t.word P'%s\n", fname);
7882 function_section (thunk_fndecl);
7885 current_thunk_number++;
7886 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
7887 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
7888 last_address += nbytes;
7889 update_total_code_bytes (nbytes);
7892 /* Only direct calls to static functions are allowed to be sibling (tail)
7895 This restriction is necessary because some linker generated stubs will
7896 store return pointers into rp' in some cases which might clobber a
7897 live value already in rp'.
7899 In a sibcall the current function and the target function share stack
7900 space. Thus if the path to the current function and the path to the
7901 target function save a value in rp', they save the value into the
7902 same stack slot, which has undesirable consequences.
7904 Because of the deferred binding nature of shared libraries any function
7905 with external scope could be in a different load module and thus require
7906 rp' to be saved when calling that function. So sibcall optimizations
7907 can only be safe for static function.
7909 Note that GCC never needs return value relocations, so we don't have to
7910 worry about static calls with return value relocations (which require
7913 It is safe to perform a sibcall optimization when the target function
7914 will never return. */
7916 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
7918 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
7919 single subspace mode and the call is not indirect. As far as I know,
7920 there is no operating system support for the multiple subspace mode.
7921 It might be possible to support indirect calls if we didn't use
7922 $$dyncall (see the indirect sequence generated in output_call). */
7924 return (decl != NULL_TREE);
7926 /* Sibcalls are not ok because the arg pointer register is not a fixed
7927 register. This prevents the sibcall optimization from occurring. In
7928 addition, there are problems with stub placement using GNU ld. This
7929 is because a normal sibcall branch uses a 17-bit relocation while
7930 a regular call branch uses a 22-bit relocation. As a result, more
7931 care needs to be taken in the placement of long-branch stubs. */
7936 && !TARGET_PORTABLE_RUNTIME
7937 && !TREE_PUBLIC (decl));
7940 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7941 use in fmpyadd instructions. */
7943 fmpyaddoperands (rtx *operands)
7945 enum machine_mode mode = GET_MODE (operands[0]);
7947 /* Must be a floating point mode. */
7948 if (mode != SFmode && mode != DFmode)
7951 /* All modes must be the same. */
7952 if (! (mode == GET_MODE (operands[1])
7953 && mode == GET_MODE (operands[2])
7954 && mode == GET_MODE (operands[3])
7955 && mode == GET_MODE (operands[4])
7956 && mode == GET_MODE (operands[5])))
7959 /* All operands must be registers. */
7960 if (! (GET_CODE (operands[1]) == REG
7961 && GET_CODE (operands[2]) == REG
7962 && GET_CODE (operands[3]) == REG
7963 && GET_CODE (operands[4]) == REG
7964 && GET_CODE (operands[5]) == REG))
7967 /* Only 2 real operands to the addition. One of the input operands must
7968 be the same as the output operand. */
7969 if (! rtx_equal_p (operands[3], operands[4])
7970 && ! rtx_equal_p (operands[3], operands[5]))
7973 /* Inout operand of add can not conflict with any operands from multiply. */
7974 if (rtx_equal_p (operands[3], operands[0])
7975 || rtx_equal_p (operands[3], operands[1])
7976 || rtx_equal_p (operands[3], operands[2]))
7979 /* multiply can not feed into addition operands. */
7980 if (rtx_equal_p (operands[4], operands[0])
7981 || rtx_equal_p (operands[5], operands[0]))
7984 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
7986 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
7987 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
7988 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
7989 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
7990 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
7991 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
7994 /* Passed. Operands are suitable for fmpyadd. */
7998 #if !defined(USE_COLLECT2)
8000 pa_asm_out_constructor (rtx symbol, int priority)
8002 if (!function_label_operand (symbol, VOIDmode))
8003 hppa_encode_label (symbol);
8005 #ifdef CTORS_SECTION_ASM_OP
8006 default_ctor_section_asm_out_constructor (symbol, priority);
8008 # ifdef TARGET_ASM_NAMED_SECTION
8009 default_named_section_asm_out_constructor (symbol, priority);
8011 default_stabs_asm_out_constructor (symbol, priority);
8017 pa_asm_out_destructor (rtx symbol, int priority)
8019 if (!function_label_operand (symbol, VOIDmode))
8020 hppa_encode_label (symbol);
8022 #ifdef DTORS_SECTION_ASM_OP
8023 default_dtor_section_asm_out_destructor (symbol, priority);
8025 # ifdef TARGET_ASM_NAMED_SECTION
8026 default_named_section_asm_out_destructor (symbol, priority);
8028 default_stabs_asm_out_destructor (symbol, priority);
8034 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8035 use in fmpysub instructions. */
8037 fmpysuboperands (rtx *operands)
8039 enum machine_mode mode = GET_MODE (operands[0]);
8041 /* Must be a floating point mode. */
8042 if (mode != SFmode && mode != DFmode)
8045 /* All modes must be the same. */
8046 if (! (mode == GET_MODE (operands[1])
8047 && mode == GET_MODE (operands[2])
8048 && mode == GET_MODE (operands[3])
8049 && mode == GET_MODE (operands[4])
8050 && mode == GET_MODE (operands[5])))
8053 /* All operands must be registers. */
8054 if (! (GET_CODE (operands[1]) == REG
8055 && GET_CODE (operands[2]) == REG
8056 && GET_CODE (operands[3]) == REG
8057 && GET_CODE (operands[4]) == REG
8058 && GET_CODE (operands[5]) == REG))
8061 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8062 operation, so operands[4] must be the same as operand[3]. */
8063 if (! rtx_equal_p (operands[3], operands[4]))
8066 /* multiply can not feed into subtraction. */
8067 if (rtx_equal_p (operands[5], operands[0]))
8070 /* Inout operand of sub can not conflict with any operands from multiply. */
8071 if (rtx_equal_p (operands[3], operands[0])
8072 || rtx_equal_p (operands[3], operands[1])
8073 || rtx_equal_p (operands[3], operands[2]))
8076 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8078 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8079 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8080 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8081 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8082 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8083 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8086 /* Passed. Operands are suitable for fmpysub. */
8091 plus_xor_ior_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8093 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
8094 || GET_CODE (op) == IOR);
8097 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8098 constants for shadd instructions. */
8100 shadd_constant_p (int val)
8102 if (val == 2 || val == 4 || val == 8)
8108 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
8109 the valid constant for shadd instructions. */
8111 shadd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8113 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
8116 /* Return 1 if OP is valid as a base or index register in a
8120 borx_reg_operand (rtx op, enum machine_mode mode)
8122 if (GET_CODE (op) != REG)
8125 /* We must reject virtual registers as the only expressions that
8126 can be instantiated are REG and REG+CONST. */
8127 if (op == virtual_incoming_args_rtx
8128 || op == virtual_stack_vars_rtx
8129 || op == virtual_stack_dynamic_rtx
8130 || op == virtual_outgoing_args_rtx
8131 || op == virtual_cfa_rtx)
8134 /* While it's always safe to index off the frame pointer, it's not
8135 profitable to do so when the frame pointer is being eliminated. */
8136 if (!reload_completed
8137 && flag_omit_frame_pointer
8138 && !current_function_calls_alloca
8139 && op == frame_pointer_rtx)
8142 return register_operand (op, mode);
8145 /* Return 1 if this operand is anything other than a hard register. */
8148 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8150 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8153 /* Return 1 if INSN branches forward. Should be using insn_addresses
8154 to avoid walking through all the insns... */
8156 forward_branch_p (rtx insn)
8158 rtx label = JUMP_LABEL (insn);
8165 insn = NEXT_INSN (insn);
8168 return (insn == label);
8171 /* Return 1 if OP is an equality comparison, else return 0. */
8173 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8175 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8178 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
8180 movb_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8182 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
8183 || GET_CODE (op) == LT || GET_CODE (op) == GE);
8186 /* Return 1 if INSN is in the delay slot of a call instruction. */
8188 jump_in_call_delay (rtx insn)
8191 if (GET_CODE (insn) != JUMP_INSN)
8194 if (PREV_INSN (insn)
8195 && PREV_INSN (PREV_INSN (insn))
8196 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8198 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8200 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8201 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8208 /* Output an unconditional move and branch insn. */
8211 output_parallel_movb (rtx *operands, int length)
8213 /* These are the cases in which we win. */
8215 return "mov%I1b,tr %1,%0,%2";
8217 /* None of these cases wins, but they don't lose either. */
8218 if (dbr_sequence_length () == 0)
8220 /* Nothing in the delay slot, fake it by putting the combined
8221 insn (the copy or add) in the delay slot of a bl. */
8222 if (GET_CODE (operands[1]) == CONST_INT)
8223 return "b %2\n\tldi %1,%0";
8225 return "b %2\n\tcopy %1,%0";
8229 /* Something in the delay slot, but we've got a long branch. */
8230 if (GET_CODE (operands[1]) == CONST_INT)
8231 return "ldi %1,%0\n\tb %2";
8233 return "copy %1,%0\n\tb %2";
8237 /* Output an unconditional add and branch insn. */
8240 output_parallel_addb (rtx *operands, int length)
8242 /* To make life easy we want operand0 to be the shared input/output
8243 operand and operand1 to be the readonly operand. */
8244 if (operands[0] == operands[1])
8245 operands[1] = operands[2];
8247 /* These are the cases in which we win. */
8249 return "add%I1b,tr %1,%0,%3";
8251 /* None of these cases win, but they don't lose either. */
8252 if (dbr_sequence_length () == 0)
8254 /* Nothing in the delay slot, fake it by putting the combined
8255 insn (the copy or add) in the delay slot of a bl. */
8256 return "b %3\n\tadd%I1 %1,%0,%0";
8260 /* Something in the delay slot, but we've got a long branch. */
8261 return "add%I1 %1,%0,%0\n\tb %3";
8265 /* Return nonzero if INSN (a jump insn) immediately follows a call
8266 to a named function. This is used to avoid filling the delay slot
8267 of the jump since it can usually be eliminated by modifying RP in
8268 the delay slot of the call. */
8271 following_call (rtx insn)
8273 if (! TARGET_JUMP_IN_DELAY)
8276 /* Find the previous real insn, skipping NOTEs. */
8277 insn = PREV_INSN (insn);
8278 while (insn && GET_CODE (insn) == NOTE)
8279 insn = PREV_INSN (insn);
8281 /* Check for CALL_INSNs and millicode calls. */
8283 && ((GET_CODE (insn) == CALL_INSN
8284 && get_attr_type (insn) != TYPE_DYNCALL)
8285 || (GET_CODE (insn) == INSN
8286 && GET_CODE (PATTERN (insn)) != SEQUENCE
8287 && GET_CODE (PATTERN (insn)) != USE
8288 && GET_CODE (PATTERN (insn)) != CLOBBER
8289 && get_attr_type (insn) == TYPE_MILLI)))
8295 /* We use this hook to perform a PA specific optimization which is difficult
8296 to do in earlier passes.
8298 We want the delay slots of branches within jump tables to be filled.
8299 None of the compiler passes at the moment even has the notion that a
8300 PA jump table doesn't contain addresses, but instead contains actual
8303 Because we actually jump into the table, the addresses of each entry
8304 must stay constant in relation to the beginning of the table (which
8305 itself must stay constant relative to the instruction to jump into
8306 it). I don't believe we can guarantee earlier passes of the compiler
8307 will adhere to those rules.
8309 So, late in the compilation process we find all the jump tables, and
8310 expand them into real code -- eg each entry in the jump table vector
8311 will get an appropriate label followed by a jump to the final target.
8313 Reorg and the final jump pass can then optimize these branches and
8314 fill their delay slots. We end up with smaller, more efficient code.
8316 The jump instructions within the table are special; we must be able
8317 to identify them during assembly output (if the jumps don't get filled
8318 we need to emit a nop rather than nullifying the delay slot)). We
8319 identify jumps in switch tables by using insns with the attribute
8320 type TYPE_BTABLE_BRANCH.
8322 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8323 insns. This serves two purposes, first it prevents jump.c from
8324 noticing that the last N entries in the table jump to the instruction
8325 immediately after the table and deleting the jumps. Second, those
8326 insns mark where we should emit .begin_brtab and .end_brtab directives
8327 when using GAS (allows for better link time optimizations). */
8334 remove_useless_addtr_insns (1);
8336 if (pa_cpu < PROCESSOR_8000)
8337 pa_combine_instructions ();
8340 /* This is fairly cheap, so always run it if optimizing. */
8341 if (optimize > 0 && !TARGET_BIG_SWITCH)
8343 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8344 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8346 rtx pattern, tmp, location, label;
8347 unsigned int length, i;
8349 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8350 if (GET_CODE (insn) != JUMP_INSN
8351 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8352 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8355 /* Emit marker for the beginning of the branch table. */
8356 emit_insn_before (gen_begin_brtab (), insn);
8358 pattern = PATTERN (insn);
8359 location = PREV_INSN (insn);
8360 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8362 for (i = 0; i < length; i++)
8364 /* Emit a label before each jump to keep jump.c from
8365 removing this code. */
8366 tmp = gen_label_rtx ();
8367 LABEL_NUSES (tmp) = 1;
8368 emit_label_after (tmp, location);
8369 location = NEXT_INSN (location);
8371 if (GET_CODE (pattern) == ADDR_VEC)
8372 label = XEXP (XVECEXP (pattern, 0, i), 0);
8374 label = XEXP (XVECEXP (pattern, 1, i), 0);
8376 tmp = gen_short_jump (label);
8378 /* Emit the jump itself. */
8379 tmp = emit_jump_insn_after (tmp, location);
8380 JUMP_LABEL (tmp) = label;
8381 LABEL_NUSES (label)++;
8382 location = NEXT_INSN (location);
8384 /* Emit a BARRIER after the jump. */
8385 emit_barrier_after (location);
8386 location = NEXT_INSN (location);
8389 /* Emit marker for the end of the branch table. */
8390 emit_insn_before (gen_end_brtab (), location);
8391 location = NEXT_INSN (location);
8392 emit_barrier_after (location);
8394 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8400 /* Still need brtab marker insns. FIXME: the presence of these
8401 markers disables output of the branch table to readonly memory,
8402 and any alignment directives that might be needed. Possibly,
8403 the begin_brtab insn should be output before the label for the
8404 table. This doesn matter at the moment since the tables are
8405 always output in the text section. */
8406 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8408 /* Find an ADDR_VEC insn. */
8409 if (GET_CODE (insn) != JUMP_INSN
8410 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8411 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8414 /* Now generate markers for the beginning and end of the
8416 emit_insn_before (gen_begin_brtab (), insn);
8417 emit_insn_after (gen_end_brtab (), insn);
8422 /* The PA has a number of odd instructions which can perform multiple
8423 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8424 it may be profitable to combine two instructions into one instruction
8425 with two outputs. It's not profitable PA2.0 machines because the
8426 two outputs would take two slots in the reorder buffers.
8428 This routine finds instructions which can be combined and combines
8429 them. We only support some of the potential combinations, and we
8430 only try common ways to find suitable instructions.
8432 * addb can add two registers or a register and a small integer
8433 and jump to a nearby (+-8k) location. Normally the jump to the
8434 nearby location is conditional on the result of the add, but by
8435 using the "true" condition we can make the jump unconditional.
8436 Thus addb can perform two independent operations in one insn.
8438 * movb is similar to addb in that it can perform a reg->reg
8439 or small immediate->reg copy and jump to a nearby (+-8k location).
8441 * fmpyadd and fmpysub can perform a FP multiply and either an
8442 FP add or FP sub if the operands of the multiply and add/sub are
8443 independent (there are other minor restrictions). Note both
8444 the fmpy and fadd/fsub can in theory move to better spots according
8445 to data dependencies, but for now we require the fmpy stay at a
8448 * Many of the memory operations can perform pre & post updates
8449 of index registers. GCC's pre/post increment/decrement addressing
8450 is far too simple to take advantage of all the possibilities. This
8451 pass may not be suitable since those insns may not be independent.
8453 * comclr can compare two ints or an int and a register, nullify
8454 the following instruction and zero some other register. This
8455 is more difficult to use as it's harder to find an insn which
8456 will generate a comclr than finding something like an unconditional
8457 branch. (conditional moves & long branches create comclr insns).
8459 * Most arithmetic operations can conditionally skip the next
8460 instruction. They can be viewed as "perform this operation
8461 and conditionally jump to this nearby location" (where nearby
8462 is an insns away). These are difficult to use due to the
8463 branch length restrictions. */
8466 pa_combine_instructions (void)
8470 /* This can get expensive since the basic algorithm is on the
8471 order of O(n^2) (or worse). Only do it for -O2 or higher
8472 levels of optimization. */
8476 /* Walk down the list of insns looking for "anchor" insns which
8477 may be combined with "floating" insns. As the name implies,
8478 "anchor" instructions don't move, while "floating" insns may
8480 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8481 new = make_insn_raw (new);
8483 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8485 enum attr_pa_combine_type anchor_attr;
8486 enum attr_pa_combine_type floater_attr;
8488 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8489 Also ignore any special USE insns. */
8490 if ((GET_CODE (anchor) != INSN
8491 && GET_CODE (anchor) != JUMP_INSN
8492 && GET_CODE (anchor) != CALL_INSN)
8493 || GET_CODE (PATTERN (anchor)) == USE
8494 || GET_CODE (PATTERN (anchor)) == CLOBBER
8495 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8496 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8499 anchor_attr = get_attr_pa_combine_type (anchor);
8500 /* See if anchor is an insn suitable for combination. */
8501 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8502 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8503 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8504 && ! forward_branch_p (anchor)))
8508 for (floater = PREV_INSN (anchor);
8510 floater = PREV_INSN (floater))
8512 if (GET_CODE (floater) == NOTE
8513 || (GET_CODE (floater) == INSN
8514 && (GET_CODE (PATTERN (floater)) == USE
8515 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8518 /* Anything except a regular INSN will stop our search. */
8519 if (GET_CODE (floater) != INSN
8520 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8521 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8527 /* See if FLOATER is suitable for combination with the
8529 floater_attr = get_attr_pa_combine_type (floater);
8530 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8531 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8532 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8533 && floater_attr == PA_COMBINE_TYPE_FMPY))
8535 /* If ANCHOR and FLOATER can be combined, then we're
8536 done with this pass. */
8537 if (pa_can_combine_p (new, anchor, floater, 0,
8538 SET_DEST (PATTERN (floater)),
8539 XEXP (SET_SRC (PATTERN (floater)), 0),
8540 XEXP (SET_SRC (PATTERN (floater)), 1)))
8544 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8545 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8547 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8549 if (pa_can_combine_p (new, anchor, floater, 0,
8550 SET_DEST (PATTERN (floater)),
8551 XEXP (SET_SRC (PATTERN (floater)), 0),
8552 XEXP (SET_SRC (PATTERN (floater)), 1)))
8557 if (pa_can_combine_p (new, anchor, floater, 0,
8558 SET_DEST (PATTERN (floater)),
8559 SET_SRC (PATTERN (floater)),
8560 SET_SRC (PATTERN (floater))))
8566 /* If we didn't find anything on the backwards scan try forwards. */
8568 && (anchor_attr == PA_COMBINE_TYPE_FMPY
8569 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8571 for (floater = anchor; floater; floater = NEXT_INSN (floater))
8573 if (GET_CODE (floater) == NOTE
8574 || (GET_CODE (floater) == INSN
8575 && (GET_CODE (PATTERN (floater)) == USE
8576 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8580 /* Anything except a regular INSN will stop our search. */
8581 if (GET_CODE (floater) != INSN
8582 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8583 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8589 /* See if FLOATER is suitable for combination with the
8591 floater_attr = get_attr_pa_combine_type (floater);
8592 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8593 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8594 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8595 && floater_attr == PA_COMBINE_TYPE_FMPY))
8597 /* If ANCHOR and FLOATER can be combined, then we're
8598 done with this pass. */
8599 if (pa_can_combine_p (new, anchor, floater, 1,
8600 SET_DEST (PATTERN (floater)),
8601 XEXP (SET_SRC (PATTERN (floater)),
8603 XEXP (SET_SRC (PATTERN (floater)),
8610 /* FLOATER will be nonzero if we found a suitable floating
8611 insn for combination with ANCHOR. */
8613 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8614 || anchor_attr == PA_COMBINE_TYPE_FMPY))
8616 /* Emit the new instruction and delete the old anchor. */
8617 emit_insn_before (gen_rtx_PARALLEL
8619 gen_rtvec (2, PATTERN (anchor),
8620 PATTERN (floater))),
8623 PUT_CODE (anchor, NOTE);
8624 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8625 NOTE_SOURCE_FILE (anchor) = 0;
8627 /* Emit a special USE insn for FLOATER, then delete
8628 the floating insn. */
8629 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8630 delete_insn (floater);
8635 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8638 /* Emit the new_jump instruction and delete the old anchor. */
8640 = emit_jump_insn_before (gen_rtx_PARALLEL
8642 gen_rtvec (2, PATTERN (anchor),
8643 PATTERN (floater))),
8646 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8647 PUT_CODE (anchor, NOTE);
8648 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8649 NOTE_SOURCE_FILE (anchor) = 0;
8651 /* Emit a special USE insn for FLOATER, then delete
8652 the floating insn. */
8653 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8654 delete_insn (floater);
8662 pa_can_combine_p (rtx new, rtx anchor, rtx floater, int reversed, rtx dest,
8665 int insn_code_number;
8668 /* Create a PARALLEL with the patterns of ANCHOR and
8669 FLOATER, try to recognize it, then test constraints
8670 for the resulting pattern.
8672 If the pattern doesn't match or the constraints
8673 aren't met keep searching for a suitable floater
8675 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8676 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8677 INSN_CODE (new) = -1;
8678 insn_code_number = recog_memoized (new);
8679 if (insn_code_number < 0
8680 || (extract_insn (new), ! constrain_operands (1)))
8694 /* There's up to three operands to consider. One
8695 output and two inputs.
8697 The output must not be used between FLOATER & ANCHOR
8698 exclusive. The inputs must not be set between
8699 FLOATER and ANCHOR exclusive. */
8701 if (reg_used_between_p (dest, start, end))
8704 if (reg_set_between_p (src1, start, end))
8707 if (reg_set_between_p (src2, start, end))
8710 /* If we get here, then everything is good. */
8714 /* Return nonzero if references for INSN are delayed.
8716 Millicode insns are actually function calls with some special
8717 constraints on arguments and register usage.
8719 Millicode calls always expect their arguments in the integer argument
8720 registers, and always return their result in %r29 (ret1). They
8721 are expected to clobber their arguments, %r1, %r29, and the return
8722 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8724 This function tells reorg that the references to arguments and
8725 millicode calls do not appear to happen until after the millicode call.
8726 This allows reorg to put insns which set the argument registers into the
8727 delay slot of the millicode call -- thus they act more like traditional
8730 Note we can not consider side effects of the insn to be delayed because
8731 the branch and link insn will clobber the return pointer. If we happened
8732 to use the return pointer in the delay slot of the call, then we lose.
8734 get_attr_type will try to recognize the given insn, so make sure to
8735 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8738 insn_refs_are_delayed (rtx insn)
8740 return ((GET_CODE (insn) == INSN
8741 && GET_CODE (PATTERN (insn)) != SEQUENCE
8742 && GET_CODE (PATTERN (insn)) != USE
8743 && GET_CODE (PATTERN (insn)) != CLOBBER
8744 && get_attr_type (insn) == TYPE_MILLI));
8747 /* On the HP-PA the value is found in register(s) 28(-29), unless
8748 the mode is SF or DF. Then the value is returned in fr4 (32).
8750 This must perform the same promotions as PROMOTE_MODE, else
8751 PROMOTE_FUNCTION_RETURN will not work correctly.
8753 Small structures must be returned in a PARALLEL on PA64 in order
8754 to match the HP Compiler ABI. */
8757 function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
8759 enum machine_mode valmode;
8761 /* Aggregates with a size less than or equal to 128 bits are returned
8762 in GR 28(-29). They are left justified. The pad bits are undefined.
8763 Larger aggregates are returned in memory. */
8764 if (TARGET_64BIT && AGGREGATE_TYPE_P (valtype))
8768 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
8770 for (i = 0; i < ub; i++)
8772 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8773 gen_rtx_REG (DImode, 28 + i),
8778 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
8781 if ((INTEGRAL_TYPE_P (valtype)
8782 && TYPE_PRECISION (valtype) < BITS_PER_WORD)
8783 || POINTER_TYPE_P (valtype))
8784 valmode = word_mode;
8786 valmode = TYPE_MODE (valtype);
8788 if (TREE_CODE (valtype) == REAL_TYPE
8789 && TYPE_MODE (valtype) != TFmode
8790 && !TARGET_SOFT_FLOAT)
8791 return gen_rtx_REG (valmode, 32);
8793 return gen_rtx_REG (valmode, 28);
8796 /* Return the location of a parameter that is passed in a register or NULL
8797 if the parameter has any component that is passed in memory.
8799 This is new code and will be pushed to into the net sources after
8802 ??? We might want to restructure this so that it looks more like other
8805 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
8806 int named ATTRIBUTE_UNUSED)
8808 int max_arg_words = (TARGET_64BIT ? 8 : 4);
8815 if (mode == VOIDmode)
8818 arg_size = FUNCTION_ARG_SIZE (mode, type);
8820 /* If this arg would be passed partially or totally on the stack, then
8821 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
8822 handle arguments which are split between regs and stack slots if
8823 the ABI mandates split arguments. */
8826 /* The 32-bit ABI does not split arguments. */
8827 if (cum->words + arg_size > max_arg_words)
8833 alignment = cum->words & 1;
8834 if (cum->words + alignment >= max_arg_words)
8838 /* The 32bit ABIs and the 64bit ABIs are rather different,
8839 particularly in their handling of FP registers. We might
8840 be able to cleverly share code between them, but I'm not
8841 going to bother in the hope that splitting them up results
8842 in code that is more easily understood. */
8846 /* Advance the base registers to their current locations.
8848 Remember, gprs grow towards smaller register numbers while
8849 fprs grow to higher register numbers. Also remember that
8850 although FP regs are 32-bit addressable, we pretend that
8851 the registers are 64-bits wide. */
8852 gpr_reg_base = 26 - cum->words;
8853 fpr_reg_base = 32 + cum->words;
8855 /* Arguments wider than one word and small aggregates need special
8859 || (type && AGGREGATE_TYPE_P (type)))
8861 /* Double-extended precision (80-bit), quad-precision (128-bit)
8862 and aggregates including complex numbers are aligned on
8863 128-bit boundaries. The first eight 64-bit argument slots
8864 are associated one-to-one, with general registers r26
8865 through r19, and also with floating-point registers fr4
8866 through fr11. Arguments larger than one word are always
8867 passed in general registers.
8869 Using a PARALLEL with a word mode register results in left
8870 justified data on a big-endian target. */
8873 int i, offset = 0, ub = arg_size;
8875 /* Align the base register. */
8876 gpr_reg_base -= alignment;
8878 ub = MIN (ub, max_arg_words - cum->words - alignment);
8879 for (i = 0; i < ub; i++)
8881 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8882 gen_rtx_REG (DImode, gpr_reg_base),
8888 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
8893 /* If the argument is larger than a word, then we know precisely
8894 which registers we must use. */
8908 /* Structures 5 to 8 bytes in size are passed in the general
8909 registers in the same manner as other non floating-point
8910 objects. The data is right-justified and zero-extended
8913 This is magic. Normally, using a PARALLEL results in left
8914 justified data on a big-endian target. However, using a
8915 single double-word register provides the required right
8916 justification for 5 to 8 byte structures. This has nothing
8917 to do with the direction of padding specified for the argument.
8918 It has to do with how the data is widened and shifted into
8919 and from the register.
8921 Aside from adding load_multiple and store_multiple patterns,
8922 this is the only way that I have found to obtain right
8923 justification of BLKmode data when it has a size greater
8924 than one word. Splitting the operation into two SImode loads
8925 or returning a DImode REG results in left justified data. */
8926 if (mode == BLKmode)
8928 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
8929 gen_rtx_REG (DImode, gpr_reg_base),
8931 return gen_rtx_PARALLEL (mode, gen_rtvec (1, loc));
8936 /* We have a single word (32 bits). A simple computation
8937 will get us the register #s we need. */
8938 gpr_reg_base = 26 - cum->words;
8939 fpr_reg_base = 32 + 2 * cum->words;
8943 /* Determine if the argument needs to be passed in both general and
8944 floating point registers. */
8945 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
8946 /* If we are doing soft-float with portable runtime, then there
8947 is no need to worry about FP regs. */
8948 && !TARGET_SOFT_FLOAT
8949 /* The parameter must be some kind of float, else we can just
8950 pass it in integer registers. */
8951 && FLOAT_MODE_P (mode)
8952 /* The target function must not have a prototype. */
8953 && cum->nargs_prototype <= 0
8954 /* libcalls do not need to pass items in both FP and general
8956 && type != NULL_TREE
8957 /* All this hair applies to "outgoing" args only. This includes
8958 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
8960 /* Also pass outgoing floating arguments in both registers in indirect
8961 calls with the 32 bit ABI and the HP assembler since there is no
8962 way to the specify argument locations in static functions. */
8967 && FLOAT_MODE_P (mode)))
8973 gen_rtx_EXPR_LIST (VOIDmode,
8974 gen_rtx_REG (mode, fpr_reg_base),
8976 gen_rtx_EXPR_LIST (VOIDmode,
8977 gen_rtx_REG (mode, gpr_reg_base),
8982 /* See if we should pass this parameter in a general register. */
8983 if (TARGET_SOFT_FLOAT
8984 /* Indirect calls in the normal 32bit ABI require all arguments
8985 to be passed in general registers. */
8986 || (!TARGET_PORTABLE_RUNTIME
8990 /* If the parameter is not a floating point parameter, then
8991 it belongs in GPRs. */
8992 || !FLOAT_MODE_P (mode))
8993 retval = gen_rtx_REG (mode, gpr_reg_base);
8995 retval = gen_rtx_REG (mode, fpr_reg_base);
9001 /* If this arg would be passed totally in registers or totally on the stack,
9002 then this routine should return zero. It is currently called only for
9003 the 64-bit target. */
9005 function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9006 tree type, int named ATTRIBUTE_UNUSED)
9008 unsigned int max_arg_words = 8;
9009 unsigned int offset = 0;
9011 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9014 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9015 /* Arg fits fully into registers. */
9017 else if (cum->words + offset >= max_arg_words)
9018 /* Arg fully on the stack. */
9022 return max_arg_words - cum->words - offset;
9026 /* Return 1 if this is a comparison operator. This allows the use of
9027 MATCH_OPERATOR to recognize all the branch insns. */
9030 cmpib_comparison_operator (rtx op, enum machine_mode mode)
9032 return ((mode == VOIDmode || GET_MODE (op) == mode)
9033 && (GET_CODE (op) == EQ
9034 || GET_CODE (op) == NE
9035 || GET_CODE (op) == GT
9036 || GET_CODE (op) == GTU
9037 || GET_CODE (op) == GE
9038 || GET_CODE (op) == LT
9039 || GET_CODE (op) == LE
9040 || GET_CODE (op) == LEU));
9043 /* On hpux10, the linker will give an error if we have a reference
9044 in the read-only data section to a symbol defined in a shared
9045 library. Therefore, expressions that might require a reloc can
9046 not be placed in the read-only data section. */
9049 pa_select_section (tree exp, int reloc,
9050 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9052 if (TREE_CODE (exp) == VAR_DECL
9053 && TREE_READONLY (exp)
9054 && !TREE_THIS_VOLATILE (exp)
9055 && DECL_INITIAL (exp)
9056 && (DECL_INITIAL (exp) == error_mark_node
9057 || TREE_CONSTANT (DECL_INITIAL (exp)))
9059 readonly_data_section ();
9060 else if (TREE_CODE_CLASS (TREE_CODE (exp)) == 'c'
9061 && !(TREE_CODE (exp) == STRING_CST && flag_writable_strings)
9063 readonly_data_section ();
9069 pa_globalize_label (FILE *stream, const char *name)
9071 /* We only handle DATA objects here, functions are globalized in
9072 ASM_DECLARE_FUNCTION_NAME. */
9073 if (! FUNCTION_NAME_P (name))
9075 fputs ("\t.EXPORT ", stream);
9076 assemble_name (stream, name);
9077 fputs (",DATA\n", stream);