1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "integrate.h"
49 #include "target-def.h"
51 static int hppa_use_dfa_pipeline_interface (void);
53 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
54 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hppa_use_dfa_pipeline_interface
57 hppa_use_dfa_pipeline_interface (void)
62 /* Return nonzero if there is a bypass for the output of
63 OUT_INSN and the fp store IN_INSN. */
65 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
67 enum machine_mode store_mode;
68 enum machine_mode other_mode;
71 if (recog_memoized (in_insn) < 0
72 || get_attr_type (in_insn) != TYPE_FPSTORE
73 || recog_memoized (out_insn) < 0)
76 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
78 set = single_set (out_insn);
82 other_mode = GET_MODE (SET_SRC (set));
84 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
88 #ifndef DO_FRAME_NOTES
89 #ifdef INCOMING_RETURN_ADDR_RTX
90 #define DO_FRAME_NOTES 1
92 #define DO_FRAME_NOTES 0
96 static void copy_reg_pointer (rtx, rtx);
97 static int hppa_address_cost (rtx);
98 static bool hppa_rtx_costs (rtx, int, int, int *);
99 static inline rtx force_mode (enum machine_mode, rtx);
100 static void pa_reorg (void);
101 static void pa_combine_instructions (void);
102 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
103 static int forward_branch_p (rtx);
104 static int shadd_constant_p (int);
105 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
106 static int compute_movstr_length (rtx);
107 static int compute_clrstr_length (rtx);
108 static bool pa_assemble_integer (rtx, unsigned int, int);
109 static void remove_useless_addtr_insns (int);
110 static void store_reg (int, HOST_WIDE_INT, int);
111 static void store_reg_modify (int, int, HOST_WIDE_INT);
112 static void load_reg (int, HOST_WIDE_INT, int);
113 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
114 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
115 static void update_total_code_bytes (int);
116 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
117 static int pa_adjust_cost (rtx, rtx, rtx, int);
118 static int pa_adjust_priority (rtx, int);
119 static int pa_issue_rate (void);
120 static void pa_select_section (tree, int, unsigned HOST_WIDE_INT)
122 static void pa_encode_section_info (tree, rtx, int);
123 static const char *pa_strip_name_encoding (const char *);
124 static bool pa_function_ok_for_sibcall (tree, tree);
125 static void pa_globalize_label (FILE *, const char *)
127 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
128 HOST_WIDE_INT, tree);
129 #if !defined(USE_COLLECT2)
130 static void pa_asm_out_constructor (rtx, int);
131 static void pa_asm_out_destructor (rtx, int);
133 static void pa_init_builtins (void);
134 static rtx hppa_builtin_saveregs (void);
135 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
136 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
137 static struct deferred_plabel *get_plabel (const char *)
139 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
140 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
141 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
142 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
143 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
144 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
145 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
146 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
147 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
148 static void output_deferred_plabels (void);
149 #ifdef HPUX_LONG_DOUBLE_LIBRARY
150 static void pa_hpux_init_libfuncs (void);
152 static rtx pa_struct_value_rtx (tree, int);
154 /* Save the operands last given to a compare for use when we
155 generate a scc or bcc insn. */
156 rtx hppa_compare_op0, hppa_compare_op1;
157 enum cmp_type hppa_branch_type;
159 /* Which cpu we are scheduling for. */
160 enum processor_type pa_cpu;
162 /* String to hold which cpu we are scheduling for. */
163 const char *pa_cpu_string;
165 /* Which architecture we are generating code for. */
166 enum architecture_type pa_arch;
168 /* String to hold which architecture we are generating code for. */
169 const char *pa_arch_string;
171 /* Counts for the number of callee-saved general and floating point
172 registers which were saved by the current function's prologue. */
173 static int gr_saved, fr_saved;
175 static rtx find_addr_reg (rtx);
177 /* Keep track of the number of bytes we have output in the CODE subspace
178 during this compilation so we'll know when to emit inline long-calls. */
179 unsigned long total_code_bytes;
181 /* The last address of the previous function plus the number of bytes in
182 associated thunks that have been output. This is used to determine if
183 a thunk can use an IA-relative branch to reach its target function. */
184 static int last_address;
186 /* Variables to handle plabels that we discover are necessary at assembly
187 output time. They are output after the current function. */
188 struct deferred_plabel GTY(())
193 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
195 static size_t n_deferred_plabels = 0;
198 /* Initialize the GCC target structure. */
200 #undef TARGET_ASM_ALIGNED_HI_OP
201 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
202 #undef TARGET_ASM_ALIGNED_SI_OP
203 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
204 #undef TARGET_ASM_ALIGNED_DI_OP
205 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
206 #undef TARGET_ASM_UNALIGNED_HI_OP
207 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
208 #undef TARGET_ASM_UNALIGNED_SI_OP
209 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
210 #undef TARGET_ASM_UNALIGNED_DI_OP
211 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
212 #undef TARGET_ASM_INTEGER
213 #define TARGET_ASM_INTEGER pa_assemble_integer
215 #undef TARGET_ASM_FUNCTION_PROLOGUE
216 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
217 #undef TARGET_ASM_FUNCTION_EPILOGUE
218 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
220 #undef TARGET_SCHED_ADJUST_COST
221 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
222 #undef TARGET_SCHED_ADJUST_PRIORITY
223 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
224 #undef TARGET_SCHED_ISSUE_RATE
225 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
227 #undef TARGET_ENCODE_SECTION_INFO
228 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
229 #undef TARGET_STRIP_NAME_ENCODING
230 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
232 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
233 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
235 #undef TARGET_ASM_OUTPUT_MI_THUNK
236 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
237 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
238 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
240 #undef TARGET_ASM_FILE_END
241 #define TARGET_ASM_FILE_END output_deferred_plabels
243 #if !defined(USE_COLLECT2)
244 #undef TARGET_ASM_CONSTRUCTOR
245 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
246 #undef TARGET_ASM_DESTRUCTOR
247 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
250 #undef TARGET_INIT_BUILTINS
251 #define TARGET_INIT_BUILTINS pa_init_builtins
253 #undef TARGET_RTX_COSTS
254 #define TARGET_RTX_COSTS hppa_rtx_costs
255 #undef TARGET_ADDRESS_COST
256 #define TARGET_ADDRESS_COST hppa_address_cost
258 #undef TARGET_MACHINE_DEPENDENT_REORG
259 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
261 #ifdef HPUX_LONG_DOUBLE_LIBRARY
262 #undef TARGET_INIT_LIBFUNCS
263 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
266 #undef TARGET_PROMOTE_FUNCTION_RETURN
267 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
268 #undef TARGET_PROMOTE_PROTOTYPES
269 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
271 #undef TARGET_STRUCT_VALUE_RTX
272 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
273 #undef TARGET_RETURN_IN_MEMORY
274 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
276 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
277 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
279 struct gcc_target targetm = TARGET_INITIALIZER;
282 override_options (void)
284 if (pa_cpu_string == NULL)
285 pa_cpu_string = TARGET_SCHED_DEFAULT;
287 if (! strcmp (pa_cpu_string, "8000"))
289 pa_cpu_string = "8000";
290 pa_cpu = PROCESSOR_8000;
292 else if (! strcmp (pa_cpu_string, "7100"))
294 pa_cpu_string = "7100";
295 pa_cpu = PROCESSOR_7100;
297 else if (! strcmp (pa_cpu_string, "700"))
299 pa_cpu_string = "700";
300 pa_cpu = PROCESSOR_700;
302 else if (! strcmp (pa_cpu_string, "7100LC"))
304 pa_cpu_string = "7100LC";
305 pa_cpu = PROCESSOR_7100LC;
307 else if (! strcmp (pa_cpu_string, "7200"))
309 pa_cpu_string = "7200";
310 pa_cpu = PROCESSOR_7200;
312 else if (! strcmp (pa_cpu_string, "7300"))
314 pa_cpu_string = "7300";
315 pa_cpu = PROCESSOR_7300;
319 warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, 7300, and 8000\n", pa_cpu_string);
322 /* Set the instruction set architecture. */
323 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
325 pa_arch_string = "1.0";
326 pa_arch = ARCHITECTURE_10;
327 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
329 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
331 pa_arch_string = "1.1";
332 pa_arch = ARCHITECTURE_11;
333 target_flags &= ~MASK_PA_20;
334 target_flags |= MASK_PA_11;
336 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
338 pa_arch_string = "2.0";
339 pa_arch = ARCHITECTURE_20;
340 target_flags |= MASK_PA_11 | MASK_PA_20;
342 else if (pa_arch_string)
344 warning ("unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
347 /* Unconditional branches in the delay slot are not compatible with dwarf2
348 call frame information. There is no benefit in using this optimization
349 on PA8000 and later processors. */
350 if (pa_cpu >= PROCESSOR_8000
351 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
352 || flag_unwind_tables)
353 target_flags &= ~MASK_JUMP_IN_DELAY;
355 if (flag_pic && TARGET_PORTABLE_RUNTIME)
357 warning ("PIC code generation is not supported in the portable runtime model\n");
360 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
362 warning ("PIC code generation is not compatible with fast indirect calls\n");
365 if (! TARGET_GAS && write_symbols != NO_DEBUG)
367 warning ("-g is only supported when using GAS on this processor,");
368 warning ("-g option disabled");
369 write_symbols = NO_DEBUG;
372 /* We only support the "big PIC" model now. And we always generate PIC
373 code when in 64bit mode. */
374 if (flag_pic == 1 || TARGET_64BIT)
377 /* We can't guarantee that .dword is available for 32-bit targets. */
378 if (UNITS_PER_WORD == 4)
379 targetm.asm_out.aligned_op.di = NULL;
381 /* The unaligned ops are only available when using GAS. */
384 targetm.asm_out.unaligned_op.hi = NULL;
385 targetm.asm_out.unaligned_op.si = NULL;
386 targetm.asm_out.unaligned_op.di = NULL;
391 pa_init_builtins (void)
393 #ifdef DONT_HAVE_FPUTC_UNLOCKED
394 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
395 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
399 /* If FROM is a probable pointer register, mark TO as a probable
400 pointer register with the same pointer alignment as FROM. */
403 copy_reg_pointer (rtx to, rtx from)
405 if (REG_POINTER (from))
406 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
409 /* Return nonzero only if OP is a register of mode MODE,
412 reg_or_0_operand (rtx op, enum machine_mode mode)
414 return (op == CONST0_RTX (mode) || register_operand (op, mode));
417 /* Return nonzero if OP is suitable for use in a call to a named
420 For 2.5 try to eliminate either call_operand_address or
421 function_label_operand, they perform very similar functions. */
423 call_operand_address (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
425 return (GET_MODE (op) == word_mode
426 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
429 /* Return 1 if X contains a symbolic expression. We know these
430 expressions will have one of a few well defined forms, so
431 we need only check those forms. */
433 symbolic_expression_p (rtx x)
436 /* Strip off any HIGH. */
437 if (GET_CODE (x) == HIGH)
440 return (symbolic_operand (x, VOIDmode));
444 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
446 switch (GET_CODE (op))
453 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
454 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
455 && GET_CODE (XEXP (op, 1)) == CONST_INT);
461 /* Return truth value of statement that OP is a symbolic memory
462 operand of mode MODE. */
465 symbolic_memory_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
467 if (GET_CODE (op) == SUBREG)
468 op = SUBREG_REG (op);
469 if (GET_CODE (op) != MEM)
472 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
473 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
476 /* Return 1 if the operand is either a register, zero, or a memory operand
477 that is not symbolic. */
480 reg_or_0_or_nonsymb_mem_operand (rtx op, enum machine_mode mode)
482 if (register_operand (op, mode))
485 if (op == CONST0_RTX (mode))
488 if (GET_CODE (op) == SUBREG)
489 op = SUBREG_REG (op);
491 if (GET_CODE (op) != MEM)
494 /* Until problems with management of the REG_POINTER flag are resolved,
495 we need to delay creating move insns with unscaled indexed addresses
496 until CSE is not expected. */
497 if (!TARGET_NO_SPACE_REGS
499 && GET_CODE (XEXP (op, 0)) == PLUS
500 && REG_P (XEXP (XEXP (op, 0), 0))
501 && REG_P (XEXP (XEXP (op, 0), 1)))
504 return (!symbolic_memory_operand (op, mode)
505 && memory_address_p (mode, XEXP (op, 0)));
508 /* Return 1 if the operand is a register operand or a non-symbolic memory
509 operand after reload. This predicate is used for branch patterns that
510 internally handle register reloading. We need to accept non-symbolic
511 memory operands after reload to ensure that the pattern is still valid
512 if reload didn't find a hard register for the operand. */
515 reg_before_reload_operand (rtx op, enum machine_mode mode)
517 /* Don't accept a SUBREG since it will need a reload. */
518 if (GET_CODE (op) == SUBREG)
521 if (register_operand (op, mode))
525 && memory_operand (op, mode)
526 && !symbolic_memory_operand (op, mode))
532 /* Accept any constant that can be moved in one instruction into a
535 cint_ok_for_move (HOST_WIDE_INT intval)
537 /* OK if ldo, ldil, or zdepi, can be used. */
538 return (CONST_OK_FOR_LETTER_P (intval, 'J')
539 || CONST_OK_FOR_LETTER_P (intval, 'N')
540 || CONST_OK_FOR_LETTER_P (intval, 'K'));
543 /* Return 1 iff OP is an indexed memory operand. */
545 indexed_memory_operand (rtx op, enum machine_mode mode)
547 if (GET_MODE (op) != mode)
550 /* Before reload, a (SUBREG (MEM...)) forces reloading into a register. */
551 if (reload_completed && GET_CODE (op) == SUBREG)
552 op = SUBREG_REG (op);
554 if (GET_CODE (op) != MEM || symbolic_memory_operand (op, mode))
559 return (memory_address_p (mode, op) && IS_INDEX_ADDR_P (op));
562 /* Accept anything that can be used as a destination operand for a
563 move instruction. We don't accept indexed memory operands since
564 they are supported only for floating point stores. */
566 move_dest_operand (rtx op, enum machine_mode mode)
568 if (register_operand (op, mode))
571 if (GET_MODE (op) != mode)
574 if (GET_CODE (op) == SUBREG)
575 op = SUBREG_REG (op);
577 if (GET_CODE (op) != MEM || symbolic_memory_operand (op, mode))
582 return (memory_address_p (mode, op)
583 && !IS_INDEX_ADDR_P (op)
584 && !IS_LO_SUM_DLT_ADDR_P (op));
587 /* Accept anything that can be used as a source operand for a move
590 move_src_operand (rtx op, enum machine_mode mode)
592 if (register_operand (op, mode))
595 if (GET_CODE (op) == CONST_INT)
596 return cint_ok_for_move (INTVAL (op));
598 if (GET_MODE (op) != mode)
601 if (GET_CODE (op) == SUBREG)
602 op = SUBREG_REG (op);
604 if (GET_CODE (op) != MEM)
607 /* Until problems with management of the REG_POINTER flag are resolved,
608 we need to delay creating move insns with unscaled indexed addresses
609 until CSE is not expected. */
610 if (!TARGET_NO_SPACE_REGS
612 && GET_CODE (XEXP (op, 0)) == PLUS
613 && REG_P (XEXP (XEXP (op, 0), 0))
614 && REG_P (XEXP (XEXP (op, 0), 1)))
617 return memory_address_p (mode, XEXP (op, 0));
620 /* Accept REG and any CONST_INT that can be moved in one instruction into a
623 reg_or_cint_move_operand (rtx op, enum machine_mode mode)
625 if (register_operand (op, mode))
628 return (GET_CODE (op) == CONST_INT && cint_ok_for_move (INTVAL (op)));
632 pic_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
637 switch (GET_CODE (op))
643 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
644 && GET_CODE (XEXP (op, 1)) == CONST_INT);
651 fp_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
653 return reg_renumber && FP_REG_P (op);
658 /* Return truth value of whether OP can be used as an operand in a
659 three operand arithmetic insn that accepts registers of mode MODE
660 or 14-bit signed integers. */
662 arith_operand (rtx op, enum machine_mode mode)
664 return (register_operand (op, mode)
665 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
668 /* Return truth value of whether OP can be used as an operand in a
669 three operand arithmetic insn that accepts registers of mode MODE
670 or 11-bit signed integers. */
672 arith11_operand (rtx op, enum machine_mode mode)
674 return (register_operand (op, mode)
675 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
678 /* Return truth value of whether OP can be used as an operand in a
681 adddi3_operand (rtx op, enum machine_mode mode)
683 return (register_operand (op, mode)
684 || (GET_CODE (op) == CONST_INT
685 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
688 /* A constant integer suitable for use in a PRE_MODIFY memory
691 pre_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
693 return (GET_CODE (op) == CONST_INT
694 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
697 /* A constant integer suitable for use in a POST_MODIFY memory
700 post_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
702 return (GET_CODE (op) == CONST_INT
703 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
707 arith_double_operand (rtx op, enum machine_mode mode)
709 return (register_operand (op, mode)
710 || (GET_CODE (op) == CONST_DOUBLE
711 && GET_MODE (op) == mode
712 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
713 && ((CONST_DOUBLE_HIGH (op) >= 0)
714 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
717 /* Return truth value of whether OP is an integer which fits the
718 range constraining immediate operands in three-address insns, or
719 is an integer register. */
722 ireg_or_int5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
724 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
725 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
728 /* Return nonzero if OP is an integer register, else return zero. */
730 ireg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
732 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
735 /* Return truth value of whether OP is an integer which fits the
736 range constraining immediate operands in three-address insns. */
739 int5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
741 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
745 uint5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
747 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
751 int11_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
753 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
757 uint32_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
759 #if HOST_BITS_PER_WIDE_INT > 32
760 /* All allowed constants will fit a CONST_INT. */
761 return (GET_CODE (op) == CONST_INT
762 && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
764 return (GET_CODE (op) == CONST_INT
765 || (GET_CODE (op) == CONST_DOUBLE
766 && CONST_DOUBLE_HIGH (op) == 0));
771 arith5_operand (rtx op, enum machine_mode mode)
773 return register_operand (op, mode) || int5_operand (op, mode);
776 /* True iff zdepi can be used to generate this CONST_INT.
777 zdepi first sign extends a 5 bit signed number to a given field
778 length, then places this field anywhere in a zero. */
780 zdepi_cint_p (unsigned HOST_WIDE_INT x)
782 unsigned HOST_WIDE_INT lsb_mask, t;
784 /* This might not be obvious, but it's at least fast.
785 This function is critical; we don't have the time loops would take. */
787 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
788 /* Return true iff t is a power of two. */
789 return ((t & (t - 1)) == 0);
792 /* True iff depi or extru can be used to compute (reg & mask).
793 Accept bit pattern like these:
798 and_mask_p (unsigned HOST_WIDE_INT mask)
801 mask += mask & -mask;
802 return (mask & (mask - 1)) == 0;
805 /* True iff depi or extru can be used to compute (reg & OP). */
807 and_operand (rtx op, enum machine_mode mode)
809 return (register_operand (op, mode)
810 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
813 /* True iff depi can be used to compute (reg | MASK). */
815 ior_mask_p (unsigned HOST_WIDE_INT mask)
817 mask += mask & -mask;
818 return (mask & (mask - 1)) == 0;
821 /* True iff depi can be used to compute (reg | OP). */
823 ior_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
825 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
829 lhs_lshift_operand (rtx op, enum machine_mode mode)
831 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
834 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
835 Such values can be the left hand side x in (x << r), using the zvdepi
838 lhs_lshift_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
840 unsigned HOST_WIDE_INT x;
841 if (GET_CODE (op) != CONST_INT)
843 x = INTVAL (op) >> 4;
844 return (x & (x + 1)) == 0;
848 arith32_operand (rtx op, enum machine_mode mode)
850 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
854 pc_or_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
856 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
859 /* Legitimize PIC addresses. If the address is already
860 position-independent, we return ORIG. Newly generated
861 position-independent addresses go to REG. If we need more
862 than one register, we lose. */
865 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
869 /* Labels need special handling. */
870 if (pic_label_operand (orig, mode))
872 /* We do not want to go through the movXX expanders here since that
873 would create recursion.
875 Nor do we really want to call a generator for a named pattern
876 since that requires multiple patterns if we want to support
879 So instead we just emit the raw set, which avoids the movXX
880 expanders completely. */
881 mark_reg_pointer (reg, BITS_PER_UNIT);
882 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
883 current_function_uses_pic_offset_table = 1;
886 if (GET_CODE (orig) == SYMBOL_REF)
893 /* Before reload, allocate a temporary register for the intermediate
894 result. This allows the sequence to be deleted when the final
895 result is unused and the insns are trivially dead. */
896 tmp_reg = ((reload_in_progress || reload_completed)
897 ? reg : gen_reg_rtx (Pmode));
899 emit_move_insn (tmp_reg,
900 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
901 gen_rtx_HIGH (word_mode, orig)));
903 = gen_rtx_MEM (Pmode,
904 gen_rtx_LO_SUM (Pmode, tmp_reg,
905 gen_rtx_UNSPEC (Pmode,
909 current_function_uses_pic_offset_table = 1;
910 MEM_NOTRAP_P (pic_ref) = 1;
911 RTX_UNCHANGING_P (pic_ref) = 1;
912 mark_reg_pointer (reg, BITS_PER_UNIT);
913 insn = emit_move_insn (reg, pic_ref);
915 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
916 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig, REG_NOTES (insn));
920 else if (GET_CODE (orig) == CONST)
924 if (GET_CODE (XEXP (orig, 0)) == PLUS
925 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
931 if (GET_CODE (XEXP (orig, 0)) == PLUS)
933 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
934 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
935 base == reg ? 0 : reg);
940 if (GET_CODE (orig) == CONST_INT)
942 if (INT_14_BITS (orig))
943 return plus_constant (base, INTVAL (orig));
944 orig = force_reg (Pmode, orig);
946 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
947 /* Likewise, should we set special REG_NOTEs here? */
953 /* Try machine-dependent ways of modifying an illegitimate address
954 to be legitimate. If we find one, return the new, valid address.
955 This macro is used in only one place: `memory_address' in explow.c.
957 OLDX is the address as it was before break_out_memory_refs was called.
958 In some cases it is useful to look at this to decide what needs to be done.
960 MODE and WIN are passed so that this macro can use
961 GO_IF_LEGITIMATE_ADDRESS.
963 It is always safe for this macro to do nothing. It exists to recognize
964 opportunities to optimize the output.
966 For the PA, transform:
968 memory(X + <large int>)
972 if (<large int> & mask) >= 16
973 Y = (<large int> & ~mask) + mask + 1 Round up.
975 Y = (<large int> & ~mask) Round down.
977 memory (Z + (<large int> - Y));
979 This is for CSE to find several similar references, and only use one Z.
981 X can either be a SYMBOL_REF or REG, but because combine can not
982 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
983 D will not fit in 14 bits.
985 MODE_FLOAT references allow displacements which fit in 5 bits, so use
988 MODE_INT references allow displacements which fit in 14 bits, so use
991 This relies on the fact that most mode MODE_FLOAT references will use FP
992 registers and most mode MODE_INT references will use integer registers.
993 (In the rare case of an FP register used in an integer MODE, we depend
994 on secondary reloads to clean things up.)
997 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
998 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
999 addressing modes to be used).
1001 Put X and Z into registers. Then put the entire expression into
1005 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1006 enum machine_mode mode)
1010 /* We need to canonicalize the order of operands in unscaled indexed
1011 addresses since the code that checks if an address is valid doesn't
1012 always try both orders. */
1013 if (!TARGET_NO_SPACE_REGS
1014 && GET_CODE (x) == PLUS
1015 && GET_MODE (x) == Pmode
1016 && REG_P (XEXP (x, 0))
1017 && REG_P (XEXP (x, 1))
1018 && REG_POINTER (XEXP (x, 0))
1019 && !REG_POINTER (XEXP (x, 1)))
1020 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1023 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1025 /* Strip off CONST. */
1026 if (GET_CODE (x) == CONST)
1029 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1030 That should always be safe. */
1031 if (GET_CODE (x) == PLUS
1032 && GET_CODE (XEXP (x, 0)) == REG
1033 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1035 rtx reg = force_reg (Pmode, XEXP (x, 1));
1036 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1039 /* Note we must reject symbols which represent function addresses
1040 since the assembler/linker can't handle arithmetic on plabels. */
1041 if (GET_CODE (x) == PLUS
1042 && GET_CODE (XEXP (x, 1)) == CONST_INT
1043 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1044 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1045 || GET_CODE (XEXP (x, 0)) == REG))
1047 rtx int_part, ptr_reg;
1049 int offset = INTVAL (XEXP (x, 1));
1052 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1053 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
1055 /* Choose which way to round the offset. Round up if we
1056 are >= halfway to the next boundary. */
1057 if ((offset & mask) >= ((mask + 1) / 2))
1058 newoffset = (offset & ~ mask) + mask + 1;
1060 newoffset = (offset & ~ mask);
1062 /* If the newoffset will not fit in 14 bits (ldo), then
1063 handling this would take 4 or 5 instructions (2 to load
1064 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1065 add the new offset and the SYMBOL_REF.) Combine can
1066 not handle 4->2 or 5->2 combinations, so do not create
1068 if (! VAL_14_BITS_P (newoffset)
1069 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1071 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1074 gen_rtx_HIGH (Pmode, const_part));
1077 gen_rtx_LO_SUM (Pmode,
1078 tmp_reg, const_part));
1082 if (! VAL_14_BITS_P (newoffset))
1083 int_part = force_reg (Pmode, GEN_INT (newoffset));
1085 int_part = GEN_INT (newoffset);
1087 ptr_reg = force_reg (Pmode,
1088 gen_rtx_PLUS (Pmode,
1089 force_reg (Pmode, XEXP (x, 0)),
1092 return plus_constant (ptr_reg, offset - newoffset);
1095 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1097 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1098 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1099 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1100 && (OBJECT_P (XEXP (x, 1))
1101 || GET_CODE (XEXP (x, 1)) == SUBREG)
1102 && GET_CODE (XEXP (x, 1)) != CONST)
1104 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1108 if (GET_CODE (reg1) != REG)
1109 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1111 reg2 = XEXP (XEXP (x, 0), 0);
1112 if (GET_CODE (reg2) != REG)
1113 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1115 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1116 gen_rtx_MULT (Pmode,
1122 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1124 Only do so for floating point modes since this is more speculative
1125 and we lose if it's an integer store. */
1126 if (GET_CODE (x) == PLUS
1127 && GET_CODE (XEXP (x, 0)) == PLUS
1128 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1129 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1130 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1131 && (mode == SFmode || mode == DFmode))
1134 /* First, try and figure out what to use as a base register. */
1135 rtx reg1, reg2, base, idx, orig_base;
1137 reg1 = XEXP (XEXP (x, 0), 1);
1142 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1143 then emit_move_sequence will turn on REG_POINTER so we'll know
1144 it's a base register below. */
1145 if (GET_CODE (reg1) != REG)
1146 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1148 if (GET_CODE (reg2) != REG)
1149 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1151 /* Figure out what the base and index are. */
1153 if (GET_CODE (reg1) == REG
1154 && REG_POINTER (reg1))
1157 orig_base = XEXP (XEXP (x, 0), 1);
1158 idx = gen_rtx_PLUS (Pmode,
1159 gen_rtx_MULT (Pmode,
1160 XEXP (XEXP (XEXP (x, 0), 0), 0),
1161 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1164 else if (GET_CODE (reg2) == REG
1165 && REG_POINTER (reg2))
1168 orig_base = XEXP (x, 1);
1175 /* If the index adds a large constant, try to scale the
1176 constant so that it can be loaded with only one insn. */
1177 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1178 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1179 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1180 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1182 /* Divide the CONST_INT by the scale factor, then add it to A. */
1183 int val = INTVAL (XEXP (idx, 1));
1185 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1186 reg1 = XEXP (XEXP (idx, 0), 0);
1187 if (GET_CODE (reg1) != REG)
1188 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1190 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1192 /* We can now generate a simple scaled indexed address. */
1195 (Pmode, gen_rtx_PLUS (Pmode,
1196 gen_rtx_MULT (Pmode, reg1,
1197 XEXP (XEXP (idx, 0), 1)),
1201 /* If B + C is still a valid base register, then add them. */
1202 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1203 && INTVAL (XEXP (idx, 1)) <= 4096
1204 && INTVAL (XEXP (idx, 1)) >= -4096)
1206 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1209 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1211 reg2 = XEXP (XEXP (idx, 0), 0);
1212 if (GET_CODE (reg2) != CONST_INT)
1213 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1215 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1216 gen_rtx_MULT (Pmode,
1222 /* Get the index into a register, then add the base + index and
1223 return a register holding the result. */
1225 /* First get A into a register. */
1226 reg1 = XEXP (XEXP (idx, 0), 0);
1227 if (GET_CODE (reg1) != REG)
1228 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1230 /* And get B into a register. */
1231 reg2 = XEXP (idx, 1);
1232 if (GET_CODE (reg2) != REG)
1233 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1235 reg1 = force_reg (Pmode,
1236 gen_rtx_PLUS (Pmode,
1237 gen_rtx_MULT (Pmode, reg1,
1238 XEXP (XEXP (idx, 0), 1)),
1241 /* Add the result to our base register and return. */
1242 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1246 /* Uh-oh. We might have an address for x[n-100000]. This needs
1247 special handling to avoid creating an indexed memory address
1248 with x-100000 as the base.
1250 If the constant part is small enough, then it's still safe because
1251 there is a guard page at the beginning and end of the data segment.
1253 Scaled references are common enough that we want to try and rearrange the
1254 terms so that we can use indexing for these addresses too. Only
1255 do the optimization for floatint point modes. */
1257 if (GET_CODE (x) == PLUS
1258 && symbolic_expression_p (XEXP (x, 1)))
1260 /* Ugly. We modify things here so that the address offset specified
1261 by the index expression is computed first, then added to x to form
1262 the entire address. */
1264 rtx regx1, regx2, regy1, regy2, y;
1266 /* Strip off any CONST. */
1268 if (GET_CODE (y) == CONST)
1271 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1273 /* See if this looks like
1274 (plus (mult (reg) (shadd_const))
1275 (const (plus (symbol_ref) (const_int))))
1277 Where const_int is small. In that case the const
1278 expression is a valid pointer for indexing.
1280 If const_int is big, but can be divided evenly by shadd_const
1281 and added to (reg). This allows more scaled indexed addresses. */
1282 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1283 && GET_CODE (XEXP (x, 0)) == MULT
1284 && GET_CODE (XEXP (y, 1)) == CONST_INT
1285 && INTVAL (XEXP (y, 1)) >= -4096
1286 && INTVAL (XEXP (y, 1)) <= 4095
1287 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1288 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1290 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1294 if (GET_CODE (reg1) != REG)
1295 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1297 reg2 = XEXP (XEXP (x, 0), 0);
1298 if (GET_CODE (reg2) != REG)
1299 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1301 return force_reg (Pmode,
1302 gen_rtx_PLUS (Pmode,
1303 gen_rtx_MULT (Pmode,
1308 else if ((mode == DFmode || mode == SFmode)
1309 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1310 && GET_CODE (XEXP (x, 0)) == MULT
1311 && GET_CODE (XEXP (y, 1)) == CONST_INT
1312 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1313 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1314 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1317 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1318 / INTVAL (XEXP (XEXP (x, 0), 1))));
1319 regx2 = XEXP (XEXP (x, 0), 0);
1320 if (GET_CODE (regx2) != REG)
1321 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1322 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1326 gen_rtx_PLUS (Pmode,
1327 gen_rtx_MULT (Pmode, regx2,
1328 XEXP (XEXP (x, 0), 1)),
1329 force_reg (Pmode, XEXP (y, 0))));
1331 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1332 && INTVAL (XEXP (y, 1)) >= -4096
1333 && INTVAL (XEXP (y, 1)) <= 4095)
1335 /* This is safe because of the guard page at the
1336 beginning and end of the data space. Just
1337 return the original address. */
1342 /* Doesn't look like one we can optimize. */
1343 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1344 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1345 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1346 regx1 = force_reg (Pmode,
1347 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1349 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1357 /* For the HPPA, REG and REG+CONST is cost 0
1358 and addresses involving symbolic constants are cost 2.
1360 PIC addresses are very expensive.
1362 It is no coincidence that this has the same structure
1363 as GO_IF_LEGITIMATE_ADDRESS. */
1366 hppa_address_cost (rtx X)
1368 switch (GET_CODE (X))
1381 /* Compute a (partial) cost for rtx X. Return true if the complete
1382 cost has been computed, and false if subexpressions should be
1383 scanned. In either case, *TOTAL contains the cost result. */
1386 hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
1391 if (INTVAL (x) == 0)
1393 else if (INT_14_BITS (x))
1410 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1411 && outer_code != SET)
1418 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1419 *total = COSTS_N_INSNS (3);
1420 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1421 *total = COSTS_N_INSNS (8);
1423 *total = COSTS_N_INSNS (20);
1427 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1429 *total = COSTS_N_INSNS (14);
1437 *total = COSTS_N_INSNS (60);
1440 case PLUS: /* this includes shNadd insns */
1442 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1443 *total = COSTS_N_INSNS (3);
1445 *total = COSTS_N_INSNS (1);
1451 *total = COSTS_N_INSNS (1);
1459 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1460 new rtx with the correct mode. */
1462 force_mode (enum machine_mode mode, rtx orig)
1464 if (mode == GET_MODE (orig))
1467 if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
1470 return gen_rtx_REG (mode, REGNO (orig));
1473 /* Emit insns to move operands[1] into operands[0].
1475 Return 1 if we have written out everything that needs to be done to
1476 do the move. Otherwise, return 0 and the caller will emit the move
1479 Note SCRATCH_REG may not be in the proper mode depending on how it
1480 will be used. This routine is responsible for creating a new copy
1481 of SCRATCH_REG in the proper mode. */
1484 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1486 register rtx operand0 = operands[0];
1487 register rtx operand1 = operands[1];
1490 /* We can only handle indexed addresses in the destination operand
1491 of floating point stores. Thus, we need to break out indexed
1492 addresses from the destination operand. */
1493 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1495 /* This is only safe up to the beginning of life analysis. */
1499 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1500 operand0 = replace_equiv_address (operand0, tem);
1503 /* On targets with non-equivalent space registers, break out unscaled
1504 indexed addresses from the source operand before the final CSE.
1505 We have to do this because the REG_POINTER flag is not correctly
1506 carried through various optimization passes and CSE may substitute
1507 a pseudo without the pointer set for one with the pointer set. As
1508 a result, we loose various opportunities to create insns with
1509 unscaled indexed addresses. */
1510 if (!TARGET_NO_SPACE_REGS
1511 && !cse_not_expected
1512 && GET_CODE (operand1) == MEM
1513 && GET_CODE (XEXP (operand1, 0)) == PLUS
1514 && REG_P (XEXP (XEXP (operand1, 0), 0))
1515 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1517 = replace_equiv_address (operand1,
1518 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1521 && reload_in_progress && GET_CODE (operand0) == REG
1522 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1523 operand0 = reg_equiv_mem[REGNO (operand0)];
1524 else if (scratch_reg
1525 && reload_in_progress && GET_CODE (operand0) == SUBREG
1526 && GET_CODE (SUBREG_REG (operand0)) == REG
1527 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1529 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1530 the code which tracks sets/uses for delete_output_reload. */
1531 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1532 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1533 SUBREG_BYTE (operand0));
1534 operand0 = alter_subreg (&temp);
1538 && reload_in_progress && GET_CODE (operand1) == REG
1539 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1540 operand1 = reg_equiv_mem[REGNO (operand1)];
1541 else if (scratch_reg
1542 && reload_in_progress && GET_CODE (operand1) == SUBREG
1543 && GET_CODE (SUBREG_REG (operand1)) == REG
1544 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1546 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1547 the code which tracks sets/uses for delete_output_reload. */
1548 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1549 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1550 SUBREG_BYTE (operand1));
1551 operand1 = alter_subreg (&temp);
1554 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1555 && ((tem = find_replacement (&XEXP (operand0, 0)))
1556 != XEXP (operand0, 0)))
1557 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1559 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1560 && ((tem = find_replacement (&XEXP (operand1, 0)))
1561 != XEXP (operand1, 0)))
1562 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1564 /* Handle secondary reloads for loads/stores of FP registers from
1565 REG+D addresses where D does not fit in 5 bits, including
1566 (subreg (mem (addr))) cases. */
1568 && fp_reg_operand (operand0, mode)
1569 && ((GET_CODE (operand1) == MEM
1570 && !memory_address_p (DFmode, XEXP (operand1, 0)))
1571 || ((GET_CODE (operand1) == SUBREG
1572 && GET_CODE (XEXP (operand1, 0)) == MEM
1573 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0))))))
1575 if (GET_CODE (operand1) == SUBREG)
1576 operand1 = XEXP (operand1, 0);
1578 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1579 it in WORD_MODE regardless of what mode it was originally given
1581 scratch_reg = force_mode (word_mode, scratch_reg);
1583 /* D might not fit in 14 bits either; for such cases load D into
1585 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1587 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1588 emit_move_insn (scratch_reg,
1589 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1591 XEXP (XEXP (operand1, 0), 0),
1595 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1596 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1597 gen_rtx_MEM (mode, scratch_reg)));
1600 else if (scratch_reg
1601 && fp_reg_operand (operand1, mode)
1602 && ((GET_CODE (operand0) == MEM
1603 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1604 || ((GET_CODE (operand0) == SUBREG)
1605 && GET_CODE (XEXP (operand0, 0)) == MEM
1606 && !memory_address_p (DFmode,
1607 XEXP (XEXP (operand0, 0), 0)))))
1609 if (GET_CODE (operand0) == SUBREG)
1610 operand0 = XEXP (operand0, 0);
1612 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1613 it in WORD_MODE regardless of what mode it was originally given
1615 scratch_reg = force_mode (word_mode, scratch_reg);
1617 /* D might not fit in 14 bits either; for such cases load D into
1619 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1621 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1622 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1625 XEXP (XEXP (operand0, 0),
1630 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1631 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1635 /* Handle secondary reloads for loads of FP registers from constant
1636 expressions by forcing the constant into memory.
1638 Use scratch_reg to hold the address of the memory location.
1640 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1641 NO_REGS when presented with a const_int and a register class
1642 containing only FP registers. Doing so unfortunately creates
1643 more problems than it solves. Fix this for 2.5. */
1644 else if (scratch_reg
1645 && CONSTANT_P (operand1)
1646 && fp_reg_operand (operand0, mode))
1650 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1651 it in WORD_MODE regardless of what mode it was originally given
1653 scratch_reg = force_mode (word_mode, scratch_reg);
1655 /* Force the constant into memory and put the address of the
1656 memory location into scratch_reg. */
1657 xoperands[0] = scratch_reg;
1658 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1659 emit_move_sequence (xoperands, Pmode, 0);
1661 /* Now load the destination register. */
1662 emit_insn (gen_rtx_SET (mode, operand0,
1663 gen_rtx_MEM (mode, scratch_reg)));
1666 /* Handle secondary reloads for SAR. These occur when trying to load
1667 the SAR from memory, FP register, or with a constant. */
1668 else if (scratch_reg
1669 && GET_CODE (operand0) == REG
1670 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1671 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1672 && (GET_CODE (operand1) == MEM
1673 || GET_CODE (operand1) == CONST_INT
1674 || (GET_CODE (operand1) == REG
1675 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1677 /* D might not fit in 14 bits either; for such cases load D into
1679 if (GET_CODE (operand1) == MEM
1680 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1682 /* We are reloading the address into the scratch register, so we
1683 want to make sure the scratch register is a full register. */
1684 scratch_reg = force_mode (word_mode, scratch_reg);
1686 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1687 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1690 XEXP (XEXP (operand1, 0),
1694 /* Now we are going to load the scratch register from memory,
1695 we want to load it in the same width as the original MEM,
1696 which must be the same as the width of the ultimate destination,
1698 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1700 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1705 /* We want to load the scratch register using the same mode as
1706 the ultimate destination. */
1707 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1709 emit_move_insn (scratch_reg, operand1);
1712 /* And emit the insn to set the ultimate destination. We know that
1713 the scratch register has the same mode as the destination at this
1715 emit_move_insn (operand0, scratch_reg);
1718 /* Handle the most common case: storing into a register. */
1719 else if (register_operand (operand0, mode))
1721 if (register_operand (operand1, mode)
1722 || (GET_CODE (operand1) == CONST_INT
1723 && cint_ok_for_move (INTVAL (operand1)))
1724 || (operand1 == CONST0_RTX (mode))
1725 || (GET_CODE (operand1) == HIGH
1726 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1727 /* Only `general_operands' can come here, so MEM is ok. */
1728 || GET_CODE (operand1) == MEM)
1730 /* Various sets are created during RTL generation which don't
1731 have the REG_POINTER flag correctly set. After the CSE pass,
1732 instruction recognition can fail if we don't consistently
1733 set this flag when performing register copies. This should
1734 also improve the opportunities for creating insns that use
1735 unscaled indexing. */
1736 if (REG_P (operand0) && REG_P (operand1))
1738 if (REG_POINTER (operand1)
1739 && !REG_POINTER (operand0)
1740 && !HARD_REGISTER_P (operand0))
1741 copy_reg_pointer (operand0, operand1);
1742 else if (REG_POINTER (operand0)
1743 && !REG_POINTER (operand1)
1744 && !HARD_REGISTER_P (operand1))
1745 copy_reg_pointer (operand1, operand0);
1748 /* When MEMs are broken out, the REG_POINTER flag doesn't
1749 get set. In some cases, we can set the REG_POINTER flag
1750 from the declaration for the MEM. */
1751 if (REG_P (operand0)
1752 && GET_CODE (operand1) == MEM
1753 && !REG_POINTER (operand0))
1755 tree decl = MEM_EXPR (operand1);
1757 /* Set the register pointer flag and register alignment
1758 if the declaration for this memory reference is a
1759 pointer type. Fortran indirect argument references
1762 && !(flag_argument_noalias > 1
1763 && TREE_CODE (decl) == INDIRECT_REF
1764 && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1768 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1770 if (TREE_CODE (decl) == COMPONENT_REF)
1771 decl = TREE_OPERAND (decl, 1);
1773 type = TREE_TYPE (decl);
1774 if (TREE_CODE (type) == ARRAY_TYPE)
1775 type = get_inner_array_type (type);
1777 if (POINTER_TYPE_P (type))
1781 type = TREE_TYPE (type);
1782 /* Using TYPE_ALIGN_OK is rather conservative as
1783 only the ada frontend actually sets it. */
1784 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1786 mark_reg_pointer (operand0, align);
1791 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1795 else if (GET_CODE (operand0) == MEM)
1797 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1798 && !(reload_in_progress || reload_completed))
1800 rtx temp = gen_reg_rtx (DFmode);
1802 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1803 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1806 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1808 /* Run this case quickly. */
1809 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1812 if (! (reload_in_progress || reload_completed))
1814 operands[0] = validize_mem (operand0);
1815 operands[1] = operand1 = force_reg (mode, operand1);
1819 /* Simplify the source if we need to.
1820 Note we do have to handle function labels here, even though we do
1821 not consider them legitimate constants. Loop optimizations can
1822 call the emit_move_xxx with one as a source. */
1823 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1824 || function_label_operand (operand1, mode)
1825 || (GET_CODE (operand1) == HIGH
1826 && symbolic_operand (XEXP (operand1, 0), mode)))
1830 if (GET_CODE (operand1) == HIGH)
1833 operand1 = XEXP (operand1, 0);
1835 if (symbolic_operand (operand1, mode))
1837 /* Argh. The assembler and linker can't handle arithmetic
1840 So we force the plabel into memory, load operand0 from
1841 the memory location, then add in the constant part. */
1842 if ((GET_CODE (operand1) == CONST
1843 && GET_CODE (XEXP (operand1, 0)) == PLUS
1844 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1845 || function_label_operand (operand1, mode))
1847 rtx temp, const_part;
1849 /* Figure out what (if any) scratch register to use. */
1850 if (reload_in_progress || reload_completed)
1852 scratch_reg = scratch_reg ? scratch_reg : operand0;
1853 /* SCRATCH_REG will hold an address and maybe the actual
1854 data. We want it in WORD_MODE regardless of what mode it
1855 was originally given to us. */
1856 scratch_reg = force_mode (word_mode, scratch_reg);
1859 scratch_reg = gen_reg_rtx (Pmode);
1861 if (GET_CODE (operand1) == CONST)
1863 /* Save away the constant part of the expression. */
1864 const_part = XEXP (XEXP (operand1, 0), 1);
1865 if (GET_CODE (const_part) != CONST_INT)
1868 /* Force the function label into memory. */
1869 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1873 /* No constant part. */
1874 const_part = NULL_RTX;
1876 /* Force the function label into memory. */
1877 temp = force_const_mem (mode, operand1);
1881 /* Get the address of the memory location. PIC-ify it if
1883 temp = XEXP (temp, 0);
1885 temp = legitimize_pic_address (temp, mode, scratch_reg);
1887 /* Put the address of the memory location into our destination
1890 emit_move_sequence (operands, mode, scratch_reg);
1892 /* Now load from the memory location into our destination
1894 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1895 emit_move_sequence (operands, mode, scratch_reg);
1897 /* And add back in the constant part. */
1898 if (const_part != NULL_RTX)
1899 expand_inc (operand0, const_part);
1908 if (reload_in_progress || reload_completed)
1910 temp = scratch_reg ? scratch_reg : operand0;
1911 /* TEMP will hold an address and maybe the actual
1912 data. We want it in WORD_MODE regardless of what mode it
1913 was originally given to us. */
1914 temp = force_mode (word_mode, temp);
1917 temp = gen_reg_rtx (Pmode);
1919 /* (const (plus (symbol) (const_int))) must be forced to
1920 memory during/after reload if the const_int will not fit
1922 if (GET_CODE (operand1) == CONST
1923 && GET_CODE (XEXP (operand1, 0)) == PLUS
1924 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1925 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1926 && (reload_completed || reload_in_progress)
1929 operands[1] = force_const_mem (mode, operand1);
1930 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1932 operands[1] = gen_rtx_MEM (mode, operands[1]);
1933 emit_move_sequence (operands, mode, temp);
1937 operands[1] = legitimize_pic_address (operand1, mode, temp);
1938 if (REG_P (operand0) && REG_P (operands[1]))
1939 copy_reg_pointer (operand0, operands[1]);
1940 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1943 /* On the HPPA, references to data space are supposed to use dp,
1944 register 27, but showing it in the RTL inhibits various cse
1945 and loop optimizations. */
1950 if (reload_in_progress || reload_completed)
1952 temp = scratch_reg ? scratch_reg : operand0;
1953 /* TEMP will hold an address and maybe the actual
1954 data. We want it in WORD_MODE regardless of what mode it
1955 was originally given to us. */
1956 temp = force_mode (word_mode, temp);
1959 temp = gen_reg_rtx (mode);
1961 /* Loading a SYMBOL_REF into a register makes that register
1962 safe to be used as the base in an indexed address.
1964 Don't mark hard registers though. That loses. */
1965 if (GET_CODE (operand0) == REG
1966 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1967 mark_reg_pointer (operand0, BITS_PER_UNIT);
1968 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1969 mark_reg_pointer (temp, BITS_PER_UNIT);
1972 set = gen_rtx_SET (mode, operand0, temp);
1974 set = gen_rtx_SET (VOIDmode,
1976 gen_rtx_LO_SUM (mode, temp, operand1));
1978 emit_insn (gen_rtx_SET (VOIDmode,
1980 gen_rtx_HIGH (mode, operand1)));
1986 else if (GET_CODE (operand1) != CONST_INT
1987 || !cint_ok_for_move (INTVAL (operand1)))
1991 HOST_WIDE_INT value = 0;
1992 HOST_WIDE_INT insv = 0;
1995 if (GET_CODE (operand1) == CONST_INT)
1996 value = INTVAL (operand1);
1999 && GET_CODE (operand1) == CONST_INT
2000 && HOST_BITS_PER_WIDE_INT > 32
2001 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2005 /* Extract the low order 32 bits of the value and sign extend.
2006 If the new value is the same as the original value, we can
2007 can use the original value as-is. If the new value is
2008 different, we use it and insert the most-significant 32-bits
2009 of the original value into the final result. */
2010 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2011 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2014 #if HOST_BITS_PER_WIDE_INT > 32
2015 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2019 operand1 = GEN_INT (nval);
2023 if (reload_in_progress || reload_completed)
2024 temp = scratch_reg ? scratch_reg : operand0;
2026 temp = gen_reg_rtx (mode);
2028 /* We don't directly split DImode constants on 32-bit targets
2029 because PLUS uses an 11-bit immediate and the insn sequence
2030 generated is not as efficient as the one using HIGH/LO_SUM. */
2031 if (GET_CODE (operand1) == CONST_INT
2032 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2035 /* Directly break constant into high and low parts. This
2036 provides better optimization opportunities because various
2037 passes recognize constants split with PLUS but not LO_SUM.
2038 We use a 14-bit signed low part except when the addition
2039 of 0x4000 to the high part might change the sign of the
2041 HOST_WIDE_INT low = value & 0x3fff;
2042 HOST_WIDE_INT high = value & ~ 0x3fff;
2046 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2054 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2055 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2059 emit_insn (gen_rtx_SET (VOIDmode, temp,
2060 gen_rtx_HIGH (mode, operand1)));
2061 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2064 insn = emit_move_insn (operands[0], operands[1]);
2066 /* Now insert the most significant 32 bits of the value
2067 into the register. When we don't have a second register
2068 available, it could take up to nine instructions to load
2069 a 64-bit integer constant. Prior to reload, we force
2070 constants that would take more than three instructions
2071 to load to the constant pool. During and after reload,
2072 we have to handle all possible values. */
2075 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2076 register and the value to be inserted is outside the
2077 range that can be loaded with three depdi instructions. */
2078 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2080 operand1 = GEN_INT (insv);
2082 emit_insn (gen_rtx_SET (VOIDmode, temp,
2083 gen_rtx_HIGH (mode, operand1)));
2084 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2085 emit_insn (gen_insv (operand0, GEN_INT (32),
2090 int len = 5, pos = 27;
2092 /* Insert the bits using the depdi instruction. */
2095 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2096 HOST_WIDE_INT sign = v5 < 0;
2098 /* Left extend the insertion. */
2099 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2100 while (pos > 0 && (insv & 1) == sign)
2102 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2107 emit_insn (gen_insv (operand0, GEN_INT (len),
2108 GEN_INT (pos), GEN_INT (v5)));
2110 len = pos > 0 && pos < 5 ? pos : 5;
2117 = gen_rtx_EXPR_LIST (REG_EQUAL, op1, REG_NOTES (insn));
2122 /* Now have insn-emit do whatever it normally does. */
2126 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2127 it will need a link/runtime reloc). */
2130 reloc_needed (tree exp)
2134 switch (TREE_CODE (exp))
2141 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2142 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2147 case NON_LVALUE_EXPR:
2148 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2154 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
2155 if (TREE_VALUE (link) != 0)
2156 reloc |= reloc_needed (TREE_VALUE (link));
2169 /* Does operand (which is a symbolic_operand) live in text space?
2170 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2174 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2176 if (GET_CODE (operand) == CONST)
2177 operand = XEXP (XEXP (operand, 0), 0);
2180 if (GET_CODE (operand) == SYMBOL_REF)
2181 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2185 if (GET_CODE (operand) == SYMBOL_REF)
2186 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2192 /* Return the best assembler insn template
2193 for moving operands[1] into operands[0] as a fullword. */
2195 singlemove_string (rtx *operands)
2197 HOST_WIDE_INT intval;
2199 if (GET_CODE (operands[0]) == MEM)
2200 return "stw %r1,%0";
2201 if (GET_CODE (operands[1]) == MEM)
2203 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2208 if (GET_MODE (operands[1]) != SFmode)
2211 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2213 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2214 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2216 operands[1] = GEN_INT (i);
2217 /* Fall through to CONST_INT case. */
2219 if (GET_CODE (operands[1]) == CONST_INT)
2221 intval = INTVAL (operands[1]);
2223 if (VAL_14_BITS_P (intval))
2225 else if ((intval & 0x7ff) == 0)
2226 return "ldil L'%1,%0";
2227 else if (zdepi_cint_p (intval))
2228 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2230 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2232 return "copy %1,%0";
2236 /* Compute position (in OP[1]) and width (in OP[2])
2237 useful for copying IMM to a register using the zdepi
2238 instructions. Store the immediate value to insert in OP[0]. */
2240 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2244 /* Find the least significant set bit in IMM. */
2245 for (lsb = 0; lsb < 32; lsb++)
2252 /* Choose variants based on *sign* of the 5-bit field. */
2253 if ((imm & 0x10) == 0)
2254 len = (lsb <= 28) ? 4 : 32 - lsb;
2257 /* Find the width of the bitstring in IMM. */
2258 for (len = 5; len < 32; len++)
2260 if ((imm & (1 << len)) == 0)
2264 /* Sign extend IMM as a 5-bit value. */
2265 imm = (imm & 0xf) - 0x10;
2273 /* Compute position (in OP[1]) and width (in OP[2])
2274 useful for copying IMM to a register using the depdi,z
2275 instructions. Store the immediate value to insert in OP[0]. */
2277 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2279 HOST_WIDE_INT lsb, len;
2281 /* Find the least significant set bit in IMM. */
2282 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2289 /* Choose variants based on *sign* of the 5-bit field. */
2290 if ((imm & 0x10) == 0)
2291 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2292 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2295 /* Find the width of the bitstring in IMM. */
2296 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2298 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2302 /* Sign extend IMM as a 5-bit value. */
2303 imm = (imm & 0xf) - 0x10;
2311 /* Output assembler code to perform a doubleword move insn
2312 with operands OPERANDS. */
2315 output_move_double (rtx *operands)
2317 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2319 rtx addreg0 = 0, addreg1 = 0;
2321 /* First classify both operands. */
2323 if (REG_P (operands[0]))
2325 else if (offsettable_memref_p (operands[0]))
2327 else if (GET_CODE (operands[0]) == MEM)
2332 if (REG_P (operands[1]))
2334 else if (CONSTANT_P (operands[1]))
2336 else if (offsettable_memref_p (operands[1]))
2338 else if (GET_CODE (operands[1]) == MEM)
2343 /* Check for the cases that the operand constraints are not
2344 supposed to allow to happen. Abort if we get one,
2345 because generating code for these cases is painful. */
2347 if (optype0 != REGOP && optype1 != REGOP)
2350 /* Handle auto decrementing and incrementing loads and stores
2351 specifically, since the structure of the function doesn't work
2352 for them without major modification. Do it better when we learn
2353 this port about the general inc/dec addressing of PA.
2354 (This was written by tege. Chide him if it doesn't work.) */
2356 if (optype0 == MEMOP)
2358 /* We have to output the address syntax ourselves, since print_operand
2359 doesn't deal with the addresses we want to use. Fix this later. */
2361 rtx addr = XEXP (operands[0], 0);
2362 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2364 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2366 operands[0] = XEXP (addr, 0);
2367 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2370 if (!reg_overlap_mentioned_p (high_reg, addr))
2372 /* No overlap between high target register and address
2373 register. (We do this in a non-obvious way to
2374 save a register file writeback) */
2375 if (GET_CODE (addr) == POST_INC)
2376 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2377 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2382 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2384 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2386 operands[0] = XEXP (addr, 0);
2387 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2390 if (!reg_overlap_mentioned_p (high_reg, addr))
2392 /* No overlap between high target register and address
2393 register. (We do this in a non-obvious way to
2394 save a register file writeback) */
2395 if (GET_CODE (addr) == PRE_INC)
2396 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2397 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2403 if (optype1 == MEMOP)
2405 /* We have to output the address syntax ourselves, since print_operand
2406 doesn't deal with the addresses we want to use. Fix this later. */
2408 rtx addr = XEXP (operands[1], 0);
2409 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2411 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2413 operands[1] = XEXP (addr, 0);
2414 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2417 if (!reg_overlap_mentioned_p (high_reg, addr))
2419 /* No overlap between high target register and address
2420 register. (We do this in a non-obvious way to
2421 save a register file writeback) */
2422 if (GET_CODE (addr) == POST_INC)
2423 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2424 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2428 /* This is an undefined situation. We should load into the
2429 address register *and* update that register. Probably
2430 we don't need to handle this at all. */
2431 if (GET_CODE (addr) == POST_INC)
2432 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2433 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2436 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2438 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2440 operands[1] = XEXP (addr, 0);
2441 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2444 if (!reg_overlap_mentioned_p (high_reg, addr))
2446 /* No overlap between high target register and address
2447 register. (We do this in a non-obvious way to
2448 save a register file writeback) */
2449 if (GET_CODE (addr) == PRE_INC)
2450 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2451 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2455 /* This is an undefined situation. We should load into the
2456 address register *and* update that register. Probably
2457 we don't need to handle this at all. */
2458 if (GET_CODE (addr) == PRE_INC)
2459 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2460 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2463 else if (GET_CODE (addr) == PLUS
2464 && GET_CODE (XEXP (addr, 0)) == MULT)
2466 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2468 if (!reg_overlap_mentioned_p (high_reg, addr))
2472 xoperands[0] = high_reg;
2473 xoperands[1] = XEXP (addr, 1);
2474 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2475 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2476 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2478 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2484 xoperands[0] = high_reg;
2485 xoperands[1] = XEXP (addr, 1);
2486 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2487 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2488 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2490 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2495 /* If an operand is an unoffsettable memory ref, find a register
2496 we can increment temporarily to make it refer to the second word. */
2498 if (optype0 == MEMOP)
2499 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2501 if (optype1 == MEMOP)
2502 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2504 /* Ok, we can do one word at a time.
2505 Normally we do the low-numbered word first.
2507 In either case, set up in LATEHALF the operands to use
2508 for the high-numbered word and in some cases alter the
2509 operands in OPERANDS to be suitable for the low-numbered word. */
2511 if (optype0 == REGOP)
2512 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2513 else if (optype0 == OFFSOP)
2514 latehalf[0] = adjust_address (operands[0], SImode, 4);
2516 latehalf[0] = operands[0];
2518 if (optype1 == REGOP)
2519 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2520 else if (optype1 == OFFSOP)
2521 latehalf[1] = adjust_address (operands[1], SImode, 4);
2522 else if (optype1 == CNSTOP)
2523 split_double (operands[1], &operands[1], &latehalf[1]);
2525 latehalf[1] = operands[1];
2527 /* If the first move would clobber the source of the second one,
2528 do them in the other order.
2530 This can happen in two cases:
2532 mem -> register where the first half of the destination register
2533 is the same register used in the memory's address. Reload
2534 can create such insns.
2536 mem in this case will be either register indirect or register
2537 indirect plus a valid offset.
2539 register -> register move where REGNO(dst) == REGNO(src + 1)
2540 someone (Tim/Tege?) claimed this can happen for parameter loads.
2542 Handle mem -> register case first. */
2543 if (optype0 == REGOP
2544 && (optype1 == MEMOP || optype1 == OFFSOP)
2545 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2548 /* Do the late half first. */
2550 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2551 output_asm_insn (singlemove_string (latehalf), latehalf);
2555 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2556 return singlemove_string (operands);
2559 /* Now handle register -> register case. */
2560 if (optype0 == REGOP && optype1 == REGOP
2561 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2563 output_asm_insn (singlemove_string (latehalf), latehalf);
2564 return singlemove_string (operands);
2567 /* Normal case: do the two words, low-numbered first. */
2569 output_asm_insn (singlemove_string (operands), operands);
2571 /* Make any unoffsettable addresses point at high-numbered word. */
2573 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2575 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2578 output_asm_insn (singlemove_string (latehalf), latehalf);
2580 /* Undo the adds we just did. */
2582 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2584 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2590 output_fp_move_double (rtx *operands)
2592 if (FP_REG_P (operands[0]))
2594 if (FP_REG_P (operands[1])
2595 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2596 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2598 output_asm_insn ("fldd%F1 %1,%0", operands);
2600 else if (FP_REG_P (operands[1]))
2602 output_asm_insn ("fstd%F0 %1,%0", operands);
2604 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2606 if (GET_CODE (operands[0]) == REG)
2609 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2610 xoperands[0] = operands[0];
2611 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2613 /* This is a pain. You have to be prepared to deal with an
2614 arbitrary address here including pre/post increment/decrement.
2616 so avoid this in the MD. */
2624 /* Return a REG that occurs in ADDR with coefficient 1.
2625 ADDR can be effectively incremented by incrementing REG. */
2628 find_addr_reg (rtx addr)
2630 while (GET_CODE (addr) == PLUS)
2632 if (GET_CODE (XEXP (addr, 0)) == REG)
2633 addr = XEXP (addr, 0);
2634 else if (GET_CODE (XEXP (addr, 1)) == REG)
2635 addr = XEXP (addr, 1);
2636 else if (CONSTANT_P (XEXP (addr, 0)))
2637 addr = XEXP (addr, 1);
2638 else if (CONSTANT_P (XEXP (addr, 1)))
2639 addr = XEXP (addr, 0);
2643 if (GET_CODE (addr) == REG)
2648 /* Emit code to perform a block move.
2650 OPERANDS[0] is the destination pointer as a REG, clobbered.
2651 OPERANDS[1] is the source pointer as a REG, clobbered.
2652 OPERANDS[2] is a register for temporary storage.
2653 OPERANDS[3] is a register for temporary storage.
2654 OPERANDS[4] is the size as a CONST_INT
2655 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2656 OPERANDS[6] is another temporary register. */
2659 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2661 int align = INTVAL (operands[5]);
2662 unsigned long n_bytes = INTVAL (operands[4]);
2664 /* We can't move more than a word at a time because the PA
2665 has no longer integer move insns. (Could use fp mem ops?) */
2666 if (align > (TARGET_64BIT ? 8 : 4))
2667 align = (TARGET_64BIT ? 8 : 4);
2669 /* Note that we know each loop below will execute at least twice
2670 (else we would have open-coded the copy). */
2674 /* Pre-adjust the loop counter. */
2675 operands[4] = GEN_INT (n_bytes - 16);
2676 output_asm_insn ("ldi %4,%2", operands);
2679 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2680 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2681 output_asm_insn ("std,ma %3,8(%0)", operands);
2682 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2683 output_asm_insn ("std,ma %6,8(%0)", operands);
2685 /* Handle the residual. There could be up to 7 bytes of
2686 residual to copy! */
2687 if (n_bytes % 16 != 0)
2689 operands[4] = GEN_INT (n_bytes % 8);
2690 if (n_bytes % 16 >= 8)
2691 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2692 if (n_bytes % 8 != 0)
2693 output_asm_insn ("ldd 0(%1),%6", operands);
2694 if (n_bytes % 16 >= 8)
2695 output_asm_insn ("std,ma %3,8(%0)", operands);
2696 if (n_bytes % 8 != 0)
2697 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2702 /* Pre-adjust the loop counter. */
2703 operands[4] = GEN_INT (n_bytes - 8);
2704 output_asm_insn ("ldi %4,%2", operands);
2707 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2708 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2709 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2710 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2711 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2713 /* Handle the residual. There could be up to 7 bytes of
2714 residual to copy! */
2715 if (n_bytes % 8 != 0)
2717 operands[4] = GEN_INT (n_bytes % 4);
2718 if (n_bytes % 8 >= 4)
2719 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2720 if (n_bytes % 4 != 0)
2721 output_asm_insn ("ldw 0(%1),%6", operands);
2722 if (n_bytes % 8 >= 4)
2723 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2724 if (n_bytes % 4 != 0)
2725 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2730 /* Pre-adjust the loop counter. */
2731 operands[4] = GEN_INT (n_bytes - 4);
2732 output_asm_insn ("ldi %4,%2", operands);
2735 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2736 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2737 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2738 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2739 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2741 /* Handle the residual. */
2742 if (n_bytes % 4 != 0)
2744 if (n_bytes % 4 >= 2)
2745 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2746 if (n_bytes % 2 != 0)
2747 output_asm_insn ("ldb 0(%1),%6", operands);
2748 if (n_bytes % 4 >= 2)
2749 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2750 if (n_bytes % 2 != 0)
2751 output_asm_insn ("stb %6,0(%0)", operands);
2756 /* Pre-adjust the loop counter. */
2757 operands[4] = GEN_INT (n_bytes - 2);
2758 output_asm_insn ("ldi %4,%2", operands);
2761 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2762 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2763 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2764 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2765 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2767 /* Handle the residual. */
2768 if (n_bytes % 2 != 0)
2770 output_asm_insn ("ldb 0(%1),%3", operands);
2771 output_asm_insn ("stb %3,0(%0)", operands);
2780 /* Count the number of insns necessary to handle this block move.
2782 Basic structure is the same as emit_block_move, except that we
2783 count insns rather than emit them. */
2786 compute_movstr_length (rtx insn)
2788 rtx pat = PATTERN (insn);
2789 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2790 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2791 unsigned int n_insns = 0;
2793 /* We can't move more than four bytes at a time because the PA
2794 has no longer integer move insns. (Could use fp mem ops?) */
2795 if (align > (TARGET_64BIT ? 8 : 4))
2796 align = (TARGET_64BIT ? 8 : 4);
2798 /* The basic copying loop. */
2802 if (n_bytes % (2 * align) != 0)
2804 if ((n_bytes % (2 * align)) >= align)
2807 if ((n_bytes % align) != 0)
2811 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2815 /* Emit code to perform a block clear.
2817 OPERANDS[0] is the destination pointer as a REG, clobbered.
2818 OPERANDS[1] is a register for temporary storage.
2819 OPERANDS[2] is the size as a CONST_INT
2820 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2823 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2825 int align = INTVAL (operands[3]);
2826 unsigned long n_bytes = INTVAL (operands[2]);
2828 /* We can't clear more than a word at a time because the PA
2829 has no longer integer move insns. */
2830 if (align > (TARGET_64BIT ? 8 : 4))
2831 align = (TARGET_64BIT ? 8 : 4);
2833 /* Note that we know each loop below will execute at least twice
2834 (else we would have open-coded the copy). */
2838 /* Pre-adjust the loop counter. */
2839 operands[2] = GEN_INT (n_bytes - 16);
2840 output_asm_insn ("ldi %2,%1", operands);
2843 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2844 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2845 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2847 /* Handle the residual. There could be up to 7 bytes of
2848 residual to copy! */
2849 if (n_bytes % 16 != 0)
2851 operands[2] = GEN_INT (n_bytes % 8);
2852 if (n_bytes % 16 >= 8)
2853 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2854 if (n_bytes % 8 != 0)
2855 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2860 /* Pre-adjust the loop counter. */
2861 operands[2] = GEN_INT (n_bytes - 8);
2862 output_asm_insn ("ldi %2,%1", operands);
2865 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2866 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2867 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2869 /* Handle the residual. There could be up to 7 bytes of
2870 residual to copy! */
2871 if (n_bytes % 8 != 0)
2873 operands[2] = GEN_INT (n_bytes % 4);
2874 if (n_bytes % 8 >= 4)
2875 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2876 if (n_bytes % 4 != 0)
2877 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2882 /* Pre-adjust the loop counter. */
2883 operands[2] = GEN_INT (n_bytes - 4);
2884 output_asm_insn ("ldi %2,%1", operands);
2887 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2888 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2889 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2891 /* Handle the residual. */
2892 if (n_bytes % 4 != 0)
2894 if (n_bytes % 4 >= 2)
2895 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2896 if (n_bytes % 2 != 0)
2897 output_asm_insn ("stb %%r0,0(%0)", operands);
2902 /* Pre-adjust the loop counter. */
2903 operands[2] = GEN_INT (n_bytes - 2);
2904 output_asm_insn ("ldi %2,%1", operands);
2907 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2908 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2909 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2911 /* Handle the residual. */
2912 if (n_bytes % 2 != 0)
2913 output_asm_insn ("stb %%r0,0(%0)", operands);
2922 /* Count the number of insns necessary to handle this block move.
2924 Basic structure is the same as emit_block_move, except that we
2925 count insns rather than emit them. */
2928 compute_clrstr_length (rtx insn)
2930 rtx pat = PATTERN (insn);
2931 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2932 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2933 unsigned int n_insns = 0;
2935 /* We can't clear more than a word at a time because the PA
2936 has no longer integer move insns. */
2937 if (align > (TARGET_64BIT ? 8 : 4))
2938 align = (TARGET_64BIT ? 8 : 4);
2940 /* The basic loop. */
2944 if (n_bytes % (2 * align) != 0)
2946 if ((n_bytes % (2 * align)) >= align)
2949 if ((n_bytes % align) != 0)
2953 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2959 output_and (rtx *operands)
2961 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2963 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2964 int ls0, ls1, ms0, p, len;
2966 for (ls0 = 0; ls0 < 32; ls0++)
2967 if ((mask & (1 << ls0)) == 0)
2970 for (ls1 = ls0; ls1 < 32; ls1++)
2971 if ((mask & (1 << ls1)) != 0)
2974 for (ms0 = ls1; ms0 < 32; ms0++)
2975 if ((mask & (1 << ms0)) == 0)
2988 operands[2] = GEN_INT (len);
2989 return "{extru|extrw,u} %1,31,%2,%0";
2993 /* We could use this `depi' for the case above as well, but `depi'
2994 requires one more register file access than an `extru'. */
2999 operands[2] = GEN_INT (p);
3000 operands[3] = GEN_INT (len);
3001 return "{depi|depwi} 0,%2,%3,%0";
3005 return "and %1,%2,%0";
3008 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3009 storing the result in operands[0]. */
3011 output_64bit_and (rtx *operands)
3013 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3015 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3016 int ls0, ls1, ms0, p, len;
3018 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3019 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3022 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3023 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3026 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3027 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3030 if (ms0 != HOST_BITS_PER_WIDE_INT)
3033 if (ls1 == HOST_BITS_PER_WIDE_INT)
3040 operands[2] = GEN_INT (len);
3041 return "extrd,u %1,63,%2,%0";
3045 /* We could use this `depi' for the case above as well, but `depi'
3046 requires one more register file access than an `extru'. */
3051 operands[2] = GEN_INT (p);
3052 operands[3] = GEN_INT (len);
3053 return "depdi 0,%2,%3,%0";
3057 return "and %1,%2,%0";
3061 output_ior (rtx *operands)
3063 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3064 int bs0, bs1, p, len;
3066 if (INTVAL (operands[2]) == 0)
3067 return "copy %1,%0";
3069 for (bs0 = 0; bs0 < 32; bs0++)
3070 if ((mask & (1 << bs0)) != 0)
3073 for (bs1 = bs0; bs1 < 32; bs1++)
3074 if ((mask & (1 << bs1)) == 0)
3077 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
3083 operands[2] = GEN_INT (p);
3084 operands[3] = GEN_INT (len);
3085 return "{depi|depwi} -1,%2,%3,%0";
3088 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3089 storing the result in operands[0]. */
3091 output_64bit_ior (rtx *operands)
3093 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3094 int bs0, bs1, p, len;
3096 if (INTVAL (operands[2]) == 0)
3097 return "copy %1,%0";
3099 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3100 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3103 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3104 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3107 if (bs1 != HOST_BITS_PER_WIDE_INT
3108 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
3114 operands[2] = GEN_INT (p);
3115 operands[3] = GEN_INT (len);
3116 return "depdi -1,%2,%3,%0";
3119 /* Target hook for assembling integer objects. This code handles
3120 aligned SI and DI integers specially, since function references must
3121 be preceded by P%. */
3124 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3126 if (size == UNITS_PER_WORD && aligned_p
3127 && function_label_operand (x, VOIDmode))
3129 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3130 output_addr_const (asm_out_file, x);
3131 fputc ('\n', asm_out_file);
3134 return default_assemble_integer (x, size, aligned_p);
3137 /* Output an ascii string. */
3139 output_ascii (FILE *file, const char *p, int size)
3143 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3145 /* The HP assembler can only take strings of 256 characters at one
3146 time. This is a limitation on input line length, *not* the
3147 length of the string. Sigh. Even worse, it seems that the
3148 restriction is in number of input characters (see \xnn &
3149 \whatever). So we have to do this very carefully. */
3151 fputs ("\t.STRING \"", file);
3154 for (i = 0; i < size; i += 4)
3158 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3160 register unsigned int c = (unsigned char) p[i + io];
3162 if (c == '\"' || c == '\\')
3163 partial_output[co++] = '\\';
3164 if (c >= ' ' && c < 0177)
3165 partial_output[co++] = c;
3169 partial_output[co++] = '\\';
3170 partial_output[co++] = 'x';
3171 hexd = c / 16 - 0 + '0';
3173 hexd -= '9' - 'a' + 1;
3174 partial_output[co++] = hexd;
3175 hexd = c % 16 - 0 + '0';
3177 hexd -= '9' - 'a' + 1;
3178 partial_output[co++] = hexd;
3181 if (chars_output + co > 243)
3183 fputs ("\"\n\t.STRING \"", file);
3186 fwrite (partial_output, 1, (size_t) co, file);
3190 fputs ("\"\n", file);
3193 /* Try to rewrite floating point comparisons & branches to avoid
3194 useless add,tr insns.
3196 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3197 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3198 first attempt to remove useless add,tr insns. It is zero
3199 for the second pass as reorg sometimes leaves bogus REG_DEAD
3202 When CHECK_NOTES is zero we can only eliminate add,tr insns
3203 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3206 remove_useless_addtr_insns (int check_notes)
3209 static int pass = 0;
3211 /* This is fairly cheap, so always run it when optimizing. */
3215 int fbranch_count = 0;
3217 /* Walk all the insns in this function looking for fcmp & fbranch
3218 instructions. Keep track of how many of each we find. */
3219 for (insn = get_insns (); insn; insn = next_insn (insn))
3223 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3224 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3227 tmp = PATTERN (insn);
3229 /* It must be a set. */
3230 if (GET_CODE (tmp) != SET)
3233 /* If the destination is CCFP, then we've found an fcmp insn. */
3234 tmp = SET_DEST (tmp);
3235 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3241 tmp = PATTERN (insn);
3242 /* If this is an fbranch instruction, bump the fbranch counter. */
3243 if (GET_CODE (tmp) == SET
3244 && SET_DEST (tmp) == pc_rtx
3245 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3246 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3247 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3248 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3256 /* Find all floating point compare + branch insns. If possible,
3257 reverse the comparison & the branch to avoid add,tr insns. */
3258 for (insn = get_insns (); insn; insn = next_insn (insn))
3262 /* Ignore anything that isn't an INSN. */
3263 if (GET_CODE (insn) != INSN)
3266 tmp = PATTERN (insn);
3268 /* It must be a set. */
3269 if (GET_CODE (tmp) != SET)
3272 /* The destination must be CCFP, which is register zero. */
3273 tmp = SET_DEST (tmp);
3274 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3277 /* INSN should be a set of CCFP.
3279 See if the result of this insn is used in a reversed FP
3280 conditional branch. If so, reverse our condition and
3281 the branch. Doing so avoids useless add,tr insns. */
3282 next = next_insn (insn);
3285 /* Jumps, calls and labels stop our search. */
3286 if (GET_CODE (next) == JUMP_INSN
3287 || GET_CODE (next) == CALL_INSN
3288 || GET_CODE (next) == CODE_LABEL)
3291 /* As does another fcmp insn. */
3292 if (GET_CODE (next) == INSN
3293 && GET_CODE (PATTERN (next)) == SET
3294 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3295 && REGNO (SET_DEST (PATTERN (next))) == 0)
3298 next = next_insn (next);
3301 /* Is NEXT_INSN a branch? */
3303 && GET_CODE (next) == JUMP_INSN)
3305 rtx pattern = PATTERN (next);
3307 /* If it a reversed fp conditional branch (eg uses add,tr)
3308 and CCFP dies, then reverse our conditional and the branch
3309 to avoid the add,tr. */
3310 if (GET_CODE (pattern) == SET
3311 && SET_DEST (pattern) == pc_rtx
3312 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3313 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3314 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3315 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3316 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3317 && (fcmp_count == fbranch_count
3319 && find_regno_note (next, REG_DEAD, 0))))
3321 /* Reverse the branch. */
3322 tmp = XEXP (SET_SRC (pattern), 1);
3323 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3324 XEXP (SET_SRC (pattern), 2) = tmp;
3325 INSN_CODE (next) = -1;
3327 /* Reverse our condition. */
3328 tmp = PATTERN (insn);
3329 PUT_CODE (XEXP (tmp, 1),
3330 (reverse_condition_maybe_unordered
3331 (GET_CODE (XEXP (tmp, 1)))));
3341 /* You may have trouble believing this, but this is the 32 bit HP-PA
3346 Variable arguments (optional; any number may be allocated)
3348 SP-(4*(N+9)) arg word N
3353 Fixed arguments (must be allocated; may remain unused)
3362 SP-32 External Data Pointer (DP)
3364 SP-24 External/stub RP (RP')
3368 SP-8 Calling Stub RP (RP'')
3373 SP-0 Stack Pointer (points to next available address)
3377 /* This function saves registers as follows. Registers marked with ' are
3378 this function's registers (as opposed to the previous function's).
3379 If a frame_pointer isn't needed, r4 is saved as a general register;
3380 the space for the frame pointer is still allocated, though, to keep
3386 SP (FP') Previous FP
3387 SP + 4 Alignment filler (sigh)
3388 SP + 8 Space for locals reserved here.
3392 SP + n All call saved register used.
3396 SP + o All call saved fp registers used.
3400 SP + p (SP') points to next available address.
3404 /* Global variables set by output_function_prologue(). */
3405 /* Size of frame. Need to know this to emit return insns from
3407 static HOST_WIDE_INT actual_fsize, local_fsize;
3408 static int save_fregs;
3410 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3411 Handle case where DISP > 8k by using the add_high_const patterns.
3413 Note in DISP > 8k case, we will leave the high part of the address
3414 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3417 store_reg (int reg, HOST_WIDE_INT disp, int base)
3419 rtx insn, dest, src, basereg;
3421 src = gen_rtx_REG (word_mode, reg);
3422 basereg = gen_rtx_REG (Pmode, base);
3423 if (VAL_14_BITS_P (disp))
3425 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3426 insn = emit_move_insn (dest, src);
3428 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3430 rtx delta = GEN_INT (disp);
3431 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3433 emit_move_insn (tmpreg, delta);
3434 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3435 dest = gen_rtx_MEM (word_mode, tmpreg);
3436 insn = emit_move_insn (dest, src);
3440 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3441 gen_rtx_SET (VOIDmode,
3442 gen_rtx_MEM (word_mode,
3443 gen_rtx_PLUS (word_mode, basereg,
3451 rtx delta = GEN_INT (disp);
3452 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3453 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3455 emit_move_insn (tmpreg, high);
3456 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3457 insn = emit_move_insn (dest, src);
3461 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3462 gen_rtx_SET (VOIDmode,
3463 gen_rtx_MEM (word_mode,
3464 gen_rtx_PLUS (word_mode, basereg,
3472 RTX_FRAME_RELATED_P (insn) = 1;
3475 /* Emit RTL to store REG at the memory location specified by BASE and then
3476 add MOD to BASE. MOD must be <= 8k. */
3479 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3481 rtx insn, basereg, srcreg, delta;
3483 if (!VAL_14_BITS_P (mod))
3486 basereg = gen_rtx_REG (Pmode, base);
3487 srcreg = gen_rtx_REG (word_mode, reg);
3488 delta = GEN_INT (mod);
3490 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3493 RTX_FRAME_RELATED_P (insn) = 1;
3495 /* RTX_FRAME_RELATED_P must be set on each frame related set
3496 in a parallel with more than one element. Don't set
3497 RTX_FRAME_RELATED_P in the first set if reg is temporary
3498 register 1. The effect of this operation is recorded in
3499 the initial copy. */
3502 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3503 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3507 /* The first element of a PARALLEL is always processed if it is
3508 a SET. Thus, we need an expression list for this case. */
3510 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3511 gen_rtx_SET (VOIDmode, basereg,
3512 gen_rtx_PLUS (word_mode, basereg, delta)),
3518 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3519 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3520 whether to add a frame note or not.
3522 In the DISP > 8k case, we leave the high part of the address in %r1.
3523 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3526 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3530 if (VAL_14_BITS_P (disp))
3532 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3533 plus_constant (gen_rtx_REG (Pmode, base), disp));
3535 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3537 rtx basereg = gen_rtx_REG (Pmode, base);
3538 rtx delta = GEN_INT (disp);
3539 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3541 emit_move_insn (tmpreg, delta);
3542 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3543 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3547 rtx basereg = gen_rtx_REG (Pmode, base);
3548 rtx delta = GEN_INT (disp);
3549 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3551 emit_move_insn (tmpreg,
3552 gen_rtx_PLUS (Pmode, basereg,
3553 gen_rtx_HIGH (Pmode, delta)));
3554 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3555 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3558 if (DO_FRAME_NOTES && note)
3559 RTX_FRAME_RELATED_P (insn) = 1;
3563 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3568 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3569 be consistent with the rounding and size calculation done here.
3570 Change them at the same time. */
3572 /* We do our own stack alignment. First, round the size of the
3573 stack locals up to a word boundary. */
3574 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3576 /* Space for previous frame pointer + filler. If any frame is
3577 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3578 waste some space here for the sake of HP compatibility. The
3579 first slot is only used when the frame pointer is needed. */
3580 if (size || frame_pointer_needed)
3581 size += STARTING_FRAME_OFFSET;
3583 /* If the current function calls __builtin_eh_return, then we need
3584 to allocate stack space for registers that will hold data for
3585 the exception handler. */
3586 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3590 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3592 size += i * UNITS_PER_WORD;
3595 /* Account for space used by the callee general register saves. */
3596 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3597 if (regs_ever_live[i])
3598 size += UNITS_PER_WORD;
3600 /* Account for space used by the callee floating point register saves. */
3601 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3602 if (regs_ever_live[i]
3603 || (!TARGET_64BIT && regs_ever_live[i + 1]))
3607 /* We always save both halves of the FP register, so always
3608 increment the frame size by 8 bytes. */
3612 /* If any of the floating registers are saved, account for the
3613 alignment needed for the floating point register save block. */
3616 size = (size + 7) & ~7;
3621 /* The various ABIs include space for the outgoing parameters in the
3622 size of the current function's stack frame. We don't need to align
3623 for the outgoing arguments as their alignment is set by the final
3624 rounding for the frame as a whole. */
3625 size += current_function_outgoing_args_size;
3627 /* Allocate space for the fixed frame marker. This space must be
3628 allocated for any function that makes calls or allocates
3630 if (!current_function_is_leaf || size)
3631 size += TARGET_64BIT ? 48 : 32;
3633 /* Finally, round to the preferred stack boundary. */
3634 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3635 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3638 /* Generate the assembly code for function entry. FILE is a stdio
3639 stream to output the code to. SIZE is an int: how many units of
3640 temporary storage to allocate.
3642 Refer to the array `regs_ever_live' to determine which registers to
3643 save; `regs_ever_live[I]' is nonzero if register number I is ever
3644 used in the function. This function is responsible for knowing
3645 which registers should not be saved even if used. */
3647 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3648 of memory. If any fpu reg is used in the function, we allocate
3649 such a block here, at the bottom of the frame, just in case it's needed.
3651 If this function is a leaf procedure, then we may choose not
3652 to do a "save" insn. The decision about whether or not
3653 to do this is made in regclass.c. */
3656 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3658 /* The function's label and associated .PROC must never be
3659 separated and must be output *after* any profiling declarations
3660 to avoid changing spaces/subspaces within a procedure. */
3661 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3662 fputs ("\t.PROC\n", file);
3664 /* hppa_expand_prologue does the dirty work now. We just need
3665 to output the assembler directives which denote the start
3667 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3668 if (regs_ever_live[2])
3669 fputs (",CALLS,SAVE_RP", file);
3671 fputs (",NO_CALLS", file);
3673 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3674 at the beginning of the frame and that it is used as the frame
3675 pointer for the frame. We do this because our current frame
3676 layout doesn't conform to that specified in the the HP runtime
3677 documentation and we need a way to indicate to programs such as
3678 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3679 isn't used by HP compilers but is supported by the assembler.
3680 However, SAVE_SP is supposed to indicate that the previous stack
3681 pointer has been saved in the frame marker. */
3682 if (frame_pointer_needed)
3683 fputs (",SAVE_SP", file);
3685 /* Pass on information about the number of callee register saves
3686 performed in the prologue.
3688 The compiler is supposed to pass the highest register number
3689 saved, the assembler then has to adjust that number before
3690 entering it into the unwind descriptor (to account for any
3691 caller saved registers with lower register numbers than the
3692 first callee saved register). */
3694 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3697 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3699 fputs ("\n\t.ENTRY\n", file);
3701 remove_useless_addtr_insns (0);
3705 hppa_expand_prologue (void)
3707 int merge_sp_adjust_with_store = 0;
3708 HOST_WIDE_INT size = get_frame_size ();
3709 HOST_WIDE_INT offset;
3717 /* Compute total size for frame pointer, filler, locals and rounding to
3718 the next word boundary. Similar code appears in compute_frame_size
3719 and must be changed in tandem with this code. */
3720 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3721 if (local_fsize || frame_pointer_needed)
3722 local_fsize += STARTING_FRAME_OFFSET;
3724 actual_fsize = compute_frame_size (size, &save_fregs);
3726 /* Compute a few things we will use often. */
3727 tmpreg = gen_rtx_REG (word_mode, 1);
3729 /* Save RP first. The calling conventions manual states RP will
3730 always be stored into the caller's frame at sp - 20 or sp - 16
3731 depending on which ABI is in use. */
3732 if (regs_ever_live[2] || current_function_calls_eh_return)
3733 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3735 /* Allocate the local frame and set up the frame pointer if needed. */
3736 if (actual_fsize != 0)
3738 if (frame_pointer_needed)
3740 /* Copy the old frame pointer temporarily into %r1. Set up the
3741 new stack pointer, then store away the saved old frame pointer
3742 into the stack at sp and at the same time update the stack
3743 pointer by actual_fsize bytes. Two versions, first
3744 handles small (<8k) frames. The second handles large (>=8k)
3746 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3749 /* We need to record the frame pointer save here since the
3750 new frame pointer is set in the following insn. */
3751 RTX_FRAME_RELATED_P (insn) = 1;
3753 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3754 gen_rtx_SET (VOIDmode,
3755 gen_rtx_MEM (word_mode, stack_pointer_rtx),
3760 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3762 RTX_FRAME_RELATED_P (insn) = 1;
3764 if (VAL_14_BITS_P (actual_fsize))
3765 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3768 /* It is incorrect to store the saved frame pointer at *sp,
3769 then increment sp (writes beyond the current stack boundary).
3771 So instead use stwm to store at *sp and post-increment the
3772 stack pointer as an atomic operation. Then increment sp to
3773 finish allocating the new frame. */
3774 HOST_WIDE_INT adjust1 = 8192 - 64;
3775 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3777 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3778 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3782 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3783 we need to store the previous stack pointer (frame pointer)
3784 into the frame marker on targets that use the HP unwind
3785 library. This allows the HP unwind library to be used to
3786 unwind GCC frames. However, we are not fully compatible
3787 with the HP library because our frame layout differs from
3788 that specified in the HP runtime specification.
3790 We don't want a frame note on this instruction as the frame
3791 marker moves during dynamic stack allocation.
3793 This instruction also serves as a blockage to prevent
3794 register spills from being scheduled before the stack
3795 pointer is raised. This is necessary as we store
3796 registers using the frame pointer as a base register,
3797 and the frame pointer is set before sp is raised. */
3798 if (TARGET_HPUX_UNWIND_LIBRARY)
3800 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3801 GEN_INT (TARGET_64BIT ? -8 : -4));
3803 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3807 emit_insn (gen_blockage ());
3809 /* no frame pointer needed. */
3812 /* In some cases we can perform the first callee register save
3813 and allocating the stack frame at the same time. If so, just
3814 make a note of it and defer allocating the frame until saving
3815 the callee registers. */
3816 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3817 merge_sp_adjust_with_store = 1;
3818 /* Can not optimize. Adjust the stack frame by actual_fsize
3821 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3826 /* Normal register save.
3828 Do not save the frame pointer in the frame_pointer_needed case. It
3829 was done earlier. */
3830 if (frame_pointer_needed)
3832 offset = local_fsize;
3834 /* Saving the EH return data registers in the frame is the simplest
3835 way to get the frame unwind information emitted. We put them
3836 just before the general registers. */
3837 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3839 unsigned int i, regno;
3843 regno = EH_RETURN_DATA_REGNO (i);
3844 if (regno == INVALID_REGNUM)
3847 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3848 offset += UNITS_PER_WORD;
3852 for (i = 18; i >= 4; i--)
3853 if (regs_ever_live[i] && ! call_used_regs[i])
3855 store_reg (i, offset, FRAME_POINTER_REGNUM);
3856 offset += UNITS_PER_WORD;
3859 /* Account for %r3 which is saved in a special place. */
3862 /* No frame pointer needed. */
3865 offset = local_fsize - actual_fsize;
3867 /* Saving the EH return data registers in the frame is the simplest
3868 way to get the frame unwind information emitted. */
3869 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3871 unsigned int i, regno;
3875 regno = EH_RETURN_DATA_REGNO (i);
3876 if (regno == INVALID_REGNUM)
3879 /* If merge_sp_adjust_with_store is nonzero, then we can
3880 optimize the first save. */
3881 if (merge_sp_adjust_with_store)
3883 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3884 merge_sp_adjust_with_store = 0;
3887 store_reg (regno, offset, STACK_POINTER_REGNUM);
3888 offset += UNITS_PER_WORD;
3892 for (i = 18; i >= 3; i--)
3893 if (regs_ever_live[i] && ! call_used_regs[i])
3895 /* If merge_sp_adjust_with_store is nonzero, then we can
3896 optimize the first GR save. */
3897 if (merge_sp_adjust_with_store)
3899 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3900 merge_sp_adjust_with_store = 0;
3903 store_reg (i, offset, STACK_POINTER_REGNUM);
3904 offset += UNITS_PER_WORD;
3908 /* If we wanted to merge the SP adjustment with a GR save, but we never
3909 did any GR saves, then just emit the adjustment here. */
3910 if (merge_sp_adjust_with_store)
3911 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3915 /* The hppa calling conventions say that %r19, the pic offset
3916 register, is saved at sp - 32 (in this function's frame)
3917 when generating PIC code. FIXME: What is the correct thing
3918 to do for functions which make no calls and allocate no
3919 frame? Do we need to allocate a frame, or can we just omit
3920 the save? For now we'll just omit the save.
3922 We don't want a note on this insn as the frame marker can
3923 move if there is a dynamic stack allocation. */
3924 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3926 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3928 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3932 /* Align pointer properly (doubleword boundary). */
3933 offset = (offset + 7) & ~7;
3935 /* Floating point register store. */
3940 /* First get the frame or stack pointer to the start of the FP register
3942 if (frame_pointer_needed)
3944 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3945 base = frame_pointer_rtx;
3949 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3950 base = stack_pointer_rtx;
3953 /* Now actually save the FP registers. */
3954 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3956 if (regs_ever_live[i]
3957 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3959 rtx addr, insn, reg;
3960 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3961 reg = gen_rtx_REG (DFmode, i);
3962 insn = emit_move_insn (addr, reg);
3965 RTX_FRAME_RELATED_P (insn) = 1;
3968 rtx mem = gen_rtx_MEM (DFmode,
3969 plus_constant (base, offset));
3971 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3972 gen_rtx_SET (VOIDmode, mem, reg),
3977 rtx meml = gen_rtx_MEM (SFmode,
3978 plus_constant (base, offset));
3979 rtx memr = gen_rtx_MEM (SFmode,
3980 plus_constant (base, offset + 4));
3981 rtx regl = gen_rtx_REG (SFmode, i);
3982 rtx regr = gen_rtx_REG (SFmode, i + 1);
3983 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3984 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3987 RTX_FRAME_RELATED_P (setl) = 1;
3988 RTX_FRAME_RELATED_P (setr) = 1;
3989 vec = gen_rtvec (2, setl, setr);
3991 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3992 gen_rtx_SEQUENCE (VOIDmode, vec),
3996 offset += GET_MODE_SIZE (DFmode);
4003 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4004 Handle case where DISP > 8k by using the add_high_const patterns. */
4007 load_reg (int reg, HOST_WIDE_INT disp, int base)
4009 rtx dest = gen_rtx_REG (word_mode, reg);
4010 rtx basereg = gen_rtx_REG (Pmode, base);
4013 if (VAL_14_BITS_P (disp))
4014 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
4015 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4017 rtx delta = GEN_INT (disp);
4018 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4020 emit_move_insn (tmpreg, delta);
4021 if (TARGET_DISABLE_INDEXING)
4023 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4024 src = gen_rtx_MEM (word_mode, tmpreg);
4027 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4031 rtx delta = GEN_INT (disp);
4032 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4033 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4035 emit_move_insn (tmpreg, high);
4036 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4039 emit_move_insn (dest, src);
4042 /* Update the total code bytes output to the text section. */
4045 update_total_code_bytes (int nbytes)
4047 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4048 && !IN_NAMED_SECTION_P (cfun->decl))
4050 if (INSN_ADDRESSES_SET_P ())
4052 unsigned long old_total = total_code_bytes;
4054 total_code_bytes += nbytes;
4056 /* Be prepared to handle overflows. */
4057 if (old_total > total_code_bytes)
4058 total_code_bytes = -1;
4061 total_code_bytes = -1;
4065 /* This function generates the assembly code for function exit.
4066 Args are as for output_function_prologue ().
4068 The function epilogue should not depend on the current stack
4069 pointer! It should use the frame pointer only. This is mandatory
4070 because of alloca; we also take advantage of it to omit stack
4071 adjustments before returning. */
4074 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4076 rtx insn = get_last_insn ();
4080 /* hppa_expand_epilogue does the dirty work now. We just need
4081 to output the assembler directives which denote the end
4084 To make debuggers happy, emit a nop if the epilogue was completely
4085 eliminated due to a volatile call as the last insn in the
4086 current function. That way the return address (in %r2) will
4087 always point to a valid instruction in the current function. */
4089 /* Get the last real insn. */
4090 if (GET_CODE (insn) == NOTE)
4091 insn = prev_real_insn (insn);
4093 /* If it is a sequence, then look inside. */
4094 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4095 insn = XVECEXP (PATTERN (insn), 0, 0);
4097 /* If insn is a CALL_INSN, then it must be a call to a volatile
4098 function (otherwise there would be epilogue insns). */
4099 if (insn && GET_CODE (insn) == CALL_INSN)
4101 fputs ("\tnop\n", file);
4105 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4107 if (INSN_ADDRESSES_SET_P ())
4109 insn = get_last_nonnote_insn ();
4110 last_address += INSN_ADDRESSES (INSN_UID (insn));
4112 last_address += insn_default_length (insn);
4113 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4114 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4117 /* Finally, update the total number of code bytes output so far. */
4118 update_total_code_bytes (last_address);
4122 hppa_expand_epilogue (void)
4125 HOST_WIDE_INT offset;
4126 HOST_WIDE_INT ret_off = 0;
4128 int merge_sp_adjust_with_load = 0;
4130 /* We will use this often. */
4131 tmpreg = gen_rtx_REG (word_mode, 1);
4133 /* Try to restore RP early to avoid load/use interlocks when
4134 RP gets used in the return (bv) instruction. This appears to still
4135 be necessary even when we schedule the prologue and epilogue. */
4136 if (regs_ever_live [2] || current_function_calls_eh_return)
4138 ret_off = TARGET_64BIT ? -16 : -20;
4139 if (frame_pointer_needed)
4141 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4146 /* No frame pointer, and stack is smaller than 8k. */
4147 if (VAL_14_BITS_P (ret_off - actual_fsize))
4149 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4155 /* General register restores. */
4156 if (frame_pointer_needed)
4158 offset = local_fsize;
4160 /* If the current function calls __builtin_eh_return, then we need
4161 to restore the saved EH data registers. */
4162 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4164 unsigned int i, regno;
4168 regno = EH_RETURN_DATA_REGNO (i);
4169 if (regno == INVALID_REGNUM)
4172 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4173 offset += UNITS_PER_WORD;
4177 for (i = 18; i >= 4; i--)
4178 if (regs_ever_live[i] && ! call_used_regs[i])
4180 load_reg (i, offset, FRAME_POINTER_REGNUM);
4181 offset += UNITS_PER_WORD;
4186 offset = local_fsize - actual_fsize;
4188 /* If the current function calls __builtin_eh_return, then we need
4189 to restore the saved EH data registers. */
4190 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4192 unsigned int i, regno;
4196 regno = EH_RETURN_DATA_REGNO (i);
4197 if (regno == INVALID_REGNUM)
4200 /* Only for the first load.
4201 merge_sp_adjust_with_load holds the register load
4202 with which we will merge the sp adjustment. */
4203 if (merge_sp_adjust_with_load == 0
4205 && VAL_14_BITS_P (-actual_fsize))
4206 merge_sp_adjust_with_load = regno;
4208 load_reg (regno, offset, STACK_POINTER_REGNUM);
4209 offset += UNITS_PER_WORD;
4213 for (i = 18; i >= 3; i--)
4215 if (regs_ever_live[i] && ! call_used_regs[i])
4217 /* Only for the first load.
4218 merge_sp_adjust_with_load holds the register load
4219 with which we will merge the sp adjustment. */
4220 if (merge_sp_adjust_with_load == 0
4222 && VAL_14_BITS_P (-actual_fsize))
4223 merge_sp_adjust_with_load = i;
4225 load_reg (i, offset, STACK_POINTER_REGNUM);
4226 offset += UNITS_PER_WORD;
4231 /* Align pointer properly (doubleword boundary). */
4232 offset = (offset + 7) & ~7;
4234 /* FP register restores. */
4237 /* Adjust the register to index off of. */
4238 if (frame_pointer_needed)
4239 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4241 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4243 /* Actually do the restores now. */
4244 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4245 if (regs_ever_live[i]
4246 || (! TARGET_64BIT && regs_ever_live[i + 1]))
4248 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4249 rtx dest = gen_rtx_REG (DFmode, i);
4250 emit_move_insn (dest, src);
4254 /* Emit a blockage insn here to keep these insns from being moved to
4255 an earlier spot in the epilogue, or into the main instruction stream.
4257 This is necessary as we must not cut the stack back before all the
4258 restores are finished. */
4259 emit_insn (gen_blockage ());
4261 /* Reset stack pointer (and possibly frame pointer). The stack
4262 pointer is initially set to fp + 64 to avoid a race condition. */
4263 if (frame_pointer_needed)
4265 rtx delta = GEN_INT (-64);
4267 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4268 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4270 /* If we were deferring a callee register restore, do it now. */
4271 else if (merge_sp_adjust_with_load)
4273 rtx delta = GEN_INT (-actual_fsize);
4274 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4276 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4278 else if (actual_fsize != 0)
4279 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4282 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4283 frame greater than 8k), do so now. */
4285 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4287 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4289 rtx sa = EH_RETURN_STACKADJ_RTX;
4291 emit_insn (gen_blockage ());
4292 emit_insn (TARGET_64BIT
4293 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4294 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4299 hppa_pic_save_rtx (void)
4301 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4305 hppa_profile_hook (int label_no)
4307 /* We use SImode for the address of the function in both 32 and
4308 64-bit code to avoid having to provide DImode versions of the
4309 lcla2 and load_offset_label_address insn patterns. */
4310 rtx reg = gen_reg_rtx (SImode);
4311 rtx label_rtx = gen_label_rtx ();
4312 rtx begin_label_rtx, call_insn;
4313 char begin_label_name[16];
4315 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4317 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4320 emit_move_insn (arg_pointer_rtx,
4321 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4324 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4326 /* The address of the function is loaded into %r25 with a instruction-
4327 relative sequence that avoids the use of relocations. The sequence
4328 is split so that the load_offset_label_address instruction can
4329 occupy the delay slot of the call to _mcount. */
4331 emit_insn (gen_lcla2 (reg, label_rtx));
4333 emit_insn (gen_lcla1 (reg, label_rtx));
4335 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4336 reg, begin_label_rtx, label_rtx));
4338 #ifndef NO_PROFILE_COUNTERS
4340 rtx count_label_rtx, addr, r24;
4341 char count_label_name[16];
4343 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4344 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4346 addr = force_reg (Pmode, count_label_rtx);
4347 r24 = gen_rtx_REG (Pmode, 24);
4348 emit_move_insn (r24, addr);
4351 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4352 gen_rtx_SYMBOL_REF (Pmode,
4354 GEN_INT (TARGET_64BIT ? 24 : 12)));
4356 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4361 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4362 gen_rtx_SYMBOL_REF (Pmode,
4364 GEN_INT (TARGET_64BIT ? 16 : 8)));
4368 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4369 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4371 /* Indicate the _mcount call cannot throw, nor will it execute a
4373 REG_NOTES (call_insn)
4374 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
4377 /* Fetch the return address for the frame COUNT steps up from
4378 the current frame, after the prologue. FRAMEADDR is the
4379 frame pointer of the COUNT frame.
4381 We want to ignore any export stub remnants here. To handle this,
4382 we examine the code at the return address, and if it is an export
4383 stub, we return a memory rtx for the stub return address stored
4386 The value returned is used in two different ways:
4388 1. To find a function's caller.
4390 2. To change the return address for a function.
4392 This function handles most instances of case 1; however, it will
4393 fail if there are two levels of stubs to execute on the return
4394 path. The only way I believe that can happen is if the return value
4395 needs a parameter relocation, which never happens for C code.
4397 This function handles most instances of case 2; however, it will
4398 fail if we did not originally have stub code on the return path
4399 but will need stub code on the new return path. This can happen if
4400 the caller & callee are both in the main program, but the new
4401 return location is in a shared library. */
4404 return_addr_rtx (int count, rtx frameaddr)
4414 rp = get_hard_reg_initial_val (Pmode, 2);
4416 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4419 saved_rp = gen_reg_rtx (Pmode);
4420 emit_move_insn (saved_rp, rp);
4422 /* Get pointer to the instruction stream. We have to mask out the
4423 privilege level from the two low order bits of the return address
4424 pointer here so that ins will point to the start of the first
4425 instruction that would have been executed if we returned. */
4426 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4427 label = gen_label_rtx ();
4429 /* Check the instruction stream at the normal return address for the
4432 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4433 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4434 0x00011820 | stub+16: mtsp r1,sr0
4435 0xe0400002 | stub+20: be,n 0(sr0,rp)
4437 If it is an export stub, than our return address is really in
4440 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4441 NULL_RTX, SImode, 1);
4442 emit_jump_insn (gen_bne (label));
4444 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4445 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4446 emit_jump_insn (gen_bne (label));
4448 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4449 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4450 emit_jump_insn (gen_bne (label));
4452 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4453 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
4455 /* If there is no export stub then just use the value saved from
4456 the return pointer register. */
4458 emit_jump_insn (gen_bne (label));
4460 /* Here we know that our return address points to an export
4461 stub. We don't want to return the address of the export stub,
4462 but rather the return address of the export stub. That return
4463 address is stored at -24[frameaddr]. */
4465 emit_move_insn (saved_rp,
4467 memory_address (Pmode,
4468 plus_constant (frameaddr,
4475 /* This is only valid once reload has completed because it depends on
4476 knowing exactly how much (if any) frame there is and...
4478 It's only valid if there is no frame marker to de-allocate and...
4480 It's only valid if %r2 hasn't been saved into the caller's frame
4481 (we're not profiling and %r2 isn't live anywhere). */
4483 hppa_can_use_return_insn_p (void)
4485 return (reload_completed
4486 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
4487 && ! regs_ever_live[2]
4488 && ! frame_pointer_needed);
4492 emit_bcond_fp (enum rtx_code code, rtx operand0)
4494 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4495 gen_rtx_IF_THEN_ELSE (VOIDmode,
4496 gen_rtx_fmt_ee (code,
4498 gen_rtx_REG (CCFPmode, 0),
4500 gen_rtx_LABEL_REF (VOIDmode, operand0),
4506 gen_cmp_fp (enum rtx_code code, rtx operand0, rtx operand1)
4508 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4509 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4512 /* Adjust the cost of a scheduling dependency. Return the new cost of
4513 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4516 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4518 enum attr_type attr_type;
4520 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4521 true dependencies as they are described with bypasses now. */
4522 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4525 if (! recog_memoized (insn))
4528 attr_type = get_attr_type (insn);
4530 if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
4532 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4535 if (attr_type == TYPE_FPLOAD)
4537 rtx pat = PATTERN (insn);
4538 rtx dep_pat = PATTERN (dep_insn);
4539 if (GET_CODE (pat) == PARALLEL)
4541 /* This happens for the fldXs,mb patterns. */
4542 pat = XVECEXP (pat, 0, 0);
4544 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4545 /* If this happens, we have to extend this to schedule
4546 optimally. Return 0 for now. */
4549 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4551 if (! recog_memoized (dep_insn))
4553 switch (get_attr_type (dep_insn))
4560 case TYPE_FPSQRTSGL:
4561 case TYPE_FPSQRTDBL:
4562 /* A fpload can't be issued until one cycle before a
4563 preceding arithmetic operation has finished if
4564 the target of the fpload is any of the sources
4565 (or destination) of the arithmetic operation. */
4566 return insn_default_latency (dep_insn) - 1;
4573 else if (attr_type == TYPE_FPALU)
4575 rtx pat = PATTERN (insn);
4576 rtx dep_pat = PATTERN (dep_insn);
4577 if (GET_CODE (pat) == PARALLEL)
4579 /* This happens for the fldXs,mb patterns. */
4580 pat = XVECEXP (pat, 0, 0);
4582 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4583 /* If this happens, we have to extend this to schedule
4584 optimally. Return 0 for now. */
4587 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4589 if (! recog_memoized (dep_insn))
4591 switch (get_attr_type (dep_insn))
4595 case TYPE_FPSQRTSGL:
4596 case TYPE_FPSQRTDBL:
4597 /* An ALU flop can't be issued until two cycles before a
4598 preceding divide or sqrt operation has finished if
4599 the target of the ALU flop is any of the sources
4600 (or destination) of the divide or sqrt operation. */
4601 return insn_default_latency (dep_insn) - 2;
4609 /* For other anti dependencies, the cost is 0. */
4612 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4614 /* Output dependency; DEP_INSN writes a register that INSN writes some
4616 if (attr_type == TYPE_FPLOAD)
4618 rtx pat = PATTERN (insn);
4619 rtx dep_pat = PATTERN (dep_insn);
4620 if (GET_CODE (pat) == PARALLEL)
4622 /* This happens for the fldXs,mb patterns. */
4623 pat = XVECEXP (pat, 0, 0);
4625 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4626 /* If this happens, we have to extend this to schedule
4627 optimally. Return 0 for now. */
4630 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4632 if (! recog_memoized (dep_insn))
4634 switch (get_attr_type (dep_insn))
4641 case TYPE_FPSQRTSGL:
4642 case TYPE_FPSQRTDBL:
4643 /* A fpload can't be issued until one cycle before a
4644 preceding arithmetic operation has finished if
4645 the target of the fpload is the destination of the
4646 arithmetic operation.
4648 Exception: For PA7100LC, PA7200 and PA7300, the cost
4649 is 3 cycles, unless they bundle together. We also
4650 pay the penalty if the second insn is a fpload. */
4651 return insn_default_latency (dep_insn) - 1;
4658 else if (attr_type == TYPE_FPALU)
4660 rtx pat = PATTERN (insn);
4661 rtx dep_pat = PATTERN (dep_insn);
4662 if (GET_CODE (pat) == PARALLEL)
4664 /* This happens for the fldXs,mb patterns. */
4665 pat = XVECEXP (pat, 0, 0);
4667 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4668 /* If this happens, we have to extend this to schedule
4669 optimally. Return 0 for now. */
4672 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4674 if (! recog_memoized (dep_insn))
4676 switch (get_attr_type (dep_insn))
4680 case TYPE_FPSQRTSGL:
4681 case TYPE_FPSQRTDBL:
4682 /* An ALU flop can't be issued until two cycles before a
4683 preceding divide or sqrt operation has finished if
4684 the target of the ALU flop is also the target of
4685 the divide or sqrt operation. */
4686 return insn_default_latency (dep_insn) - 2;
4694 /* For other output dependencies, the cost is 0. */
4701 /* Adjust scheduling priorities. We use this to try and keep addil
4702 and the next use of %r1 close together. */
4704 pa_adjust_priority (rtx insn, int priority)
4706 rtx set = single_set (insn);
4710 src = SET_SRC (set);
4711 dest = SET_DEST (set);
4712 if (GET_CODE (src) == LO_SUM
4713 && symbolic_operand (XEXP (src, 1), VOIDmode)
4714 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4717 else if (GET_CODE (src) == MEM
4718 && GET_CODE (XEXP (src, 0)) == LO_SUM
4719 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4720 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4723 else if (GET_CODE (dest) == MEM
4724 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4725 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4726 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4732 /* The 700 can only issue a single insn at a time.
4733 The 7XXX processors can issue two insns at a time.
4734 The 8000 can issue 4 insns at a time. */
4736 pa_issue_rate (void)
4740 case PROCESSOR_700: return 1;
4741 case PROCESSOR_7100: return 2;
4742 case PROCESSOR_7100LC: return 2;
4743 case PROCESSOR_7200: return 2;
4744 case PROCESSOR_7300: return 2;
4745 case PROCESSOR_8000: return 4;
4754 /* Return any length adjustment needed by INSN which already has its length
4755 computed as LENGTH. Return zero if no adjustment is necessary.
4757 For the PA: function calls, millicode calls, and backwards short
4758 conditional branches with unfilled delay slots need an adjustment by +1
4759 (to account for the NOP which will be inserted into the instruction stream).
4761 Also compute the length of an inline block move here as it is too
4762 complicated to express as a length attribute in pa.md. */
4764 pa_adjust_insn_length (rtx insn, int length)
4766 rtx pat = PATTERN (insn);
4768 /* Jumps inside switch tables which have unfilled delay slots need
4770 if (GET_CODE (insn) == JUMP_INSN
4771 && GET_CODE (pat) == PARALLEL
4772 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4774 /* Millicode insn with an unfilled delay slot. */
4775 else if (GET_CODE (insn) == INSN
4776 && GET_CODE (pat) != SEQUENCE
4777 && GET_CODE (pat) != USE
4778 && GET_CODE (pat) != CLOBBER
4779 && get_attr_type (insn) == TYPE_MILLI)
4781 /* Block move pattern. */
4782 else if (GET_CODE (insn) == INSN
4783 && GET_CODE (pat) == PARALLEL
4784 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4785 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4786 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4787 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4788 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4789 return compute_movstr_length (insn) - 4;
4790 /* Block clear pattern. */
4791 else if (GET_CODE (insn) == INSN
4792 && GET_CODE (pat) == PARALLEL
4793 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4794 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4795 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4796 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4797 return compute_clrstr_length (insn) - 4;
4798 /* Conditional branch with an unfilled delay slot. */
4799 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4801 /* Adjust a short backwards conditional with an unfilled delay slot. */
4802 if (GET_CODE (pat) == SET
4804 && ! forward_branch_p (insn))
4806 else if (GET_CODE (pat) == PARALLEL
4807 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4810 /* Adjust dbra insn with short backwards conditional branch with
4811 unfilled delay slot -- only for case where counter is in a
4812 general register register. */
4813 else if (GET_CODE (pat) == PARALLEL
4814 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4815 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4816 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4818 && ! forward_branch_p (insn))
4826 /* Print operand X (an rtx) in assembler syntax to file FILE.
4827 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4828 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4831 print_operand (FILE *file, rtx x, int code)
4836 /* Output a 'nop' if there's nothing for the delay slot. */
4837 if (dbr_sequence_length () == 0)
4838 fputs ("\n\tnop", file);
4841 /* Output a nullification completer if there's nothing for the */
4842 /* delay slot or nullification is requested. */
4843 if (dbr_sequence_length () == 0 ||
4845 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4849 /* Print out the second register name of a register pair.
4850 I.e., R (6) => 7. */
4851 fputs (reg_names[REGNO (x) + 1], file);
4854 /* A register or zero. */
4856 || (x == CONST0_RTX (DFmode))
4857 || (x == CONST0_RTX (SFmode)))
4859 fputs ("%r0", file);
4865 /* A register or zero (floating point). */
4867 || (x == CONST0_RTX (DFmode))
4868 || (x == CONST0_RTX (SFmode)))
4870 fputs ("%fr0", file);
4879 xoperands[0] = XEXP (XEXP (x, 0), 0);
4880 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4881 output_global_address (file, xoperands[1], 0);
4882 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4886 case 'C': /* Plain (C)ondition */
4888 switch (GET_CODE (x))
4891 fputs ("=", file); break;
4893 fputs ("<>", file); break;
4895 fputs (">", file); break;
4897 fputs (">=", file); break;
4899 fputs (">>=", file); break;
4901 fputs (">>", file); break;
4903 fputs ("<", file); break;
4905 fputs ("<=", file); break;
4907 fputs ("<<=", file); break;
4909 fputs ("<<", file); break;
4914 case 'N': /* Condition, (N)egated */
4915 switch (GET_CODE (x))
4918 fputs ("<>", file); break;
4920 fputs ("=", file); break;
4922 fputs ("<=", file); break;
4924 fputs ("<", file); break;
4926 fputs ("<<", file); break;
4928 fputs ("<<=", file); break;
4930 fputs (">=", file); break;
4932 fputs (">", file); break;
4934 fputs (">>", file); break;
4936 fputs (">>=", file); break;
4941 /* For floating point comparisons. Note that the output
4942 predicates are the complement of the desired mode. */
4944 switch (GET_CODE (x))
4947 fputs ("!=", file); break;
4949 fputs ("=", file); break;
4951 fputs ("!>", file); break;
4953 fputs ("!>=", file); break;
4955 fputs ("!<", file); break;
4957 fputs ("!<=", file); break;
4959 fputs ("!<>", file); break;
4961 fputs (">", file); break;
4963 fputs (">=", file); break;
4965 fputs ("<", file); break;
4967 fputs ("<=", file); break;
4969 fputs ("<>", file); break;
4971 fputs ("<=>", file); break;
4973 fputs ("!<=>", file); break;
4978 case 'S': /* Condition, operands are (S)wapped. */
4979 switch (GET_CODE (x))
4982 fputs ("=", file); break;
4984 fputs ("<>", file); break;
4986 fputs ("<", file); break;
4988 fputs ("<=", file); break;
4990 fputs ("<<=", file); break;
4992 fputs ("<<", file); break;
4994 fputs (">", file); break;
4996 fputs (">=", file); break;
4998 fputs (">>=", file); break;
5000 fputs (">>", file); break;
5005 case 'B': /* Condition, (B)oth swapped and negate. */
5006 switch (GET_CODE (x))
5009 fputs ("<>", file); break;
5011 fputs ("=", file); break;
5013 fputs (">=", file); break;
5015 fputs (">", file); break;
5017 fputs (">>", file); break;
5019 fputs (">>=", file); break;
5021 fputs ("<=", file); break;
5023 fputs ("<", file); break;
5025 fputs ("<<", file); break;
5027 fputs ("<<=", file); break;
5033 if (GET_CODE (x) == CONST_INT)
5035 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5040 if (GET_CODE (x) == CONST_INT)
5042 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5047 if (GET_CODE (x) == CONST_INT)
5049 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5054 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
5056 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5061 if (GET_CODE (x) == CONST_INT)
5063 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5068 if (GET_CODE (x) == CONST_INT)
5070 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5075 if (GET_CODE (x) == CONST_INT)
5080 switch (GET_CODE (XEXP (x, 0)))
5084 if (ASSEMBLER_DIALECT == 0)
5085 fputs ("s,mb", file);
5087 fputs (",mb", file);
5091 if (ASSEMBLER_DIALECT == 0)
5092 fputs ("s,ma", file);
5094 fputs (",ma", file);
5097 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5098 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5100 if (ASSEMBLER_DIALECT == 0)
5103 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5104 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5106 if (ASSEMBLER_DIALECT == 0)
5107 fputs ("x,s", file);
5111 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5115 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5121 output_global_address (file, x, 0);
5124 output_global_address (file, x, 1);
5126 case 0: /* Don't do anything special */
5131 compute_zdepwi_operands (INTVAL (x), op);
5132 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5138 compute_zdepdi_operands (INTVAL (x), op);
5139 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5143 /* We can get here from a .vtable_inherit due to our
5144 CONSTANT_ADDRESS_P rejecting perfectly good constant
5150 if (GET_CODE (x) == REG)
5152 fputs (reg_names [REGNO (x)], file);
5153 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5159 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5160 && (REGNO (x) & 1) == 0)
5163 else if (GET_CODE (x) == MEM)
5165 int size = GET_MODE_SIZE (GET_MODE (x));
5166 rtx base = NULL_RTX;
5167 switch (GET_CODE (XEXP (x, 0)))
5171 base = XEXP (XEXP (x, 0), 0);
5172 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5176 base = XEXP (XEXP (x, 0), 0);
5177 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5180 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5181 fprintf (file, "%s(%s)",
5182 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5183 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5184 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5185 fprintf (file, "%s(%s)",
5186 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5187 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5188 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5189 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5191 /* Because the REG_POINTER flag can get lost during reload,
5192 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5193 index and base registers in the combined move patterns. */
5194 rtx base = XEXP (XEXP (x, 0), 1);
5195 rtx index = XEXP (XEXP (x, 0), 0);
5197 fprintf (file, "%s(%s)",
5198 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5201 output_address (XEXP (x, 0));
5204 output_address (XEXP (x, 0));
5209 output_addr_const (file, x);
5212 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5215 output_global_address (FILE *file, rtx x, int round_constant)
5218 /* Imagine (high (const (plus ...))). */
5219 if (GET_CODE (x) == HIGH)
5222 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5223 assemble_name (file, XSTR (x, 0));
5224 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5226 assemble_name (file, XSTR (x, 0));
5227 fputs ("-$global$", file);
5229 else if (GET_CODE (x) == CONST)
5231 const char *sep = "";
5232 int offset = 0; /* assembler wants -$global$ at end */
5233 rtx base = NULL_RTX;
5235 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5237 base = XEXP (XEXP (x, 0), 0);
5238 output_addr_const (file, base);
5240 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
5241 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5244 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
5246 base = XEXP (XEXP (x, 0), 1);
5247 output_addr_const (file, base);
5249 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
5250 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5253 /* How bogus. The compiler is apparently responsible for
5254 rounding the constant if it uses an LR field selector.
5256 The linker and/or assembler seem a better place since
5257 they have to do this kind of thing already.
5259 If we fail to do this, HP's optimizing linker may eliminate
5260 an addil, but not update the ldw/stw/ldo instruction that
5261 uses the result of the addil. */
5263 offset = ((offset + 0x1000) & ~0x1fff);
5265 if (GET_CODE (XEXP (x, 0)) == PLUS)
5275 else if (GET_CODE (XEXP (x, 0)) == MINUS
5276 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5280 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5281 fputs ("-$global$", file);
5283 fprintf (file, "%s%d", sep, offset);
5286 output_addr_const (file, x);
5289 /* Output boilerplate text to appear at the beginning of the file.
5290 There are several possible versions. */
5291 #define aputs(x) fputs(x, asm_out_file)
5293 pa_file_start_level (void)
5296 aputs ("\t.LEVEL 2.0w\n");
5297 else if (TARGET_PA_20)
5298 aputs ("\t.LEVEL 2.0\n");
5299 else if (TARGET_PA_11)
5300 aputs ("\t.LEVEL 1.1\n");
5302 aputs ("\t.LEVEL 1.0\n");
5306 pa_file_start_space (int sortspace)
5308 aputs ("\t.SPACE $PRIVATE$");
5311 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5312 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5313 "\n\t.SPACE $TEXT$");
5316 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5317 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5321 pa_file_start_file (int want_version)
5323 if (write_symbols != NO_DEBUG)
5325 output_file_directive (asm_out_file, main_input_filename);
5327 aputs ("\t.version\t\"01.01\"\n");
5332 pa_file_start_mcount (const char *aswhat)
5335 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5339 pa_elf_file_start (void)
5341 pa_file_start_level ();
5342 pa_file_start_mcount ("ENTRY");
5343 pa_file_start_file (0);
5347 pa_som_file_start (void)
5349 pa_file_start_level ();
5350 pa_file_start_space (0);
5351 aputs ("\t.IMPORT $global$,DATA\n"
5352 "\t.IMPORT $$dyncall,MILLICODE\n");
5353 pa_file_start_mcount ("CODE");
5354 pa_file_start_file (0);
5358 pa_linux_file_start (void)
5360 pa_file_start_file (1);
5361 pa_file_start_level ();
5362 pa_file_start_mcount ("CODE");
5366 pa_hpux64_gas_file_start (void)
5368 pa_file_start_level ();
5369 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5371 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5373 pa_file_start_file (1);
5377 pa_hpux64_hpas_file_start (void)
5379 pa_file_start_level ();
5380 pa_file_start_space (1);
5381 pa_file_start_mcount ("CODE");
5382 pa_file_start_file (0);
5386 static struct deferred_plabel *
5387 get_plabel (const char *fname)
5391 /* See if we have already put this function on the list of deferred
5392 plabels. This list is generally small, so a liner search is not
5393 too ugly. If it proves too slow replace it with something faster. */
5394 for (i = 0; i < n_deferred_plabels; i++)
5395 if (strcmp (fname, deferred_plabels[i].name) == 0)
5398 /* If the deferred plabel list is empty, or this entry was not found
5399 on the list, create a new entry on the list. */
5400 if (deferred_plabels == NULL || i == n_deferred_plabels)
5402 const char *real_name;
5404 if (deferred_plabels == 0)
5405 deferred_plabels = (struct deferred_plabel *)
5406 ggc_alloc (sizeof (struct deferred_plabel));
5408 deferred_plabels = (struct deferred_plabel *)
5409 ggc_realloc (deferred_plabels,
5410 ((n_deferred_plabels + 1)
5411 * sizeof (struct deferred_plabel)));
5413 i = n_deferred_plabels++;
5414 deferred_plabels[i].internal_label = gen_label_rtx ();
5415 deferred_plabels[i].name = ggc_strdup (fname);
5417 /* Gross. We have just implicitly taken the address of this function,
5419 real_name = (*targetm.strip_name_encoding) (fname);
5420 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
5423 return &deferred_plabels[i];
5427 output_deferred_plabels (void)
5430 /* If we have deferred plabels, then we need to switch into the data
5431 section and align it to a 4 byte boundary before we output the
5432 deferred plabels. */
5433 if (n_deferred_plabels)
5436 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5439 /* Now output the deferred plabels. */
5440 for (i = 0; i < n_deferred_plabels; i++)
5442 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5443 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5444 assemble_integer (gen_rtx_SYMBOL_REF (Pmode, deferred_plabels[i].name),
5445 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5449 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5450 /* Initialize optabs to point to HPUX long double emulation routines. */
5452 pa_hpux_init_libfuncs (void)
5454 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5455 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5456 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5457 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5458 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5459 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5460 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5461 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5462 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5464 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5465 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5466 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5467 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5468 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5469 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5470 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5472 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5473 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5474 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5475 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5477 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5478 ? "__U_Qfcnvfxt_quad_to_sgl"
5479 : "_U_Qfcnvfxt_quad_to_sgl");
5480 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5481 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5482 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5484 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5485 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5489 /* HP's millicode routines mean something special to the assembler.
5490 Keep track of which ones we have used. */
5492 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5493 static void import_milli (enum millicodes);
5494 static char imported[(int) end1000];
5495 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5496 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5497 #define MILLI_START 10
5500 import_milli (enum millicodes code)
5502 char str[sizeof (import_string)];
5504 if (!imported[(int) code])
5506 imported[(int) code] = 1;
5507 strcpy (str, import_string);
5508 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5509 output_asm_insn (str, 0);
5513 /* The register constraints have put the operands and return value in
5514 the proper registers. */
5517 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5519 import_milli (mulI);
5520 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5523 /* Emit the rtl for doing a division by a constant. */
5525 /* Do magic division millicodes exist for this value? */
5526 static const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
5529 /* We'll use an array to keep track of the magic millicodes and
5530 whether or not we've used them already. [n][0] is signed, [n][1] is
5533 static int div_milli[16][2];
5536 div_operand (rtx op, enum machine_mode mode)
5538 return (mode == SImode
5539 && ((GET_CODE (op) == REG && REGNO (op) == 25)
5540 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
5541 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
5545 emit_hpdiv_const (rtx *operands, int unsignedp)
5547 if (GET_CODE (operands[2]) == CONST_INT
5548 && INTVAL (operands[2]) > 0
5549 && INTVAL (operands[2]) < 16
5550 && magic_milli[INTVAL (operands[2])])
5552 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5554 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5558 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5559 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5561 gen_rtx_REG (SImode, 26),
5563 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5564 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5565 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5566 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5567 gen_rtx_CLOBBER (VOIDmode, ret))));
5568 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5575 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5579 /* If the divisor is a constant, try to use one of the special
5581 if (GET_CODE (operands[0]) == CONST_INT)
5583 static char buf[100];
5584 divisor = INTVAL (operands[0]);
5585 if (!div_milli[divisor][unsignedp])
5587 div_milli[divisor][unsignedp] = 1;
5589 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5591 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5595 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5596 INTVAL (operands[0]));
5597 return output_millicode_call (insn,
5598 gen_rtx_SYMBOL_REF (SImode, buf));
5602 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5603 INTVAL (operands[0]));
5604 return output_millicode_call (insn,
5605 gen_rtx_SYMBOL_REF (SImode, buf));
5608 /* Divisor isn't a special constant. */
5613 import_milli (divU);
5614 return output_millicode_call (insn,
5615 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5619 import_milli (divI);
5620 return output_millicode_call (insn,
5621 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5626 /* Output a $$rem millicode to do mod. */
5629 output_mod_insn (int unsignedp, rtx insn)
5633 import_milli (remU);
5634 return output_millicode_call (insn,
5635 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5639 import_milli (remI);
5640 return output_millicode_call (insn,
5641 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5646 output_arg_descriptor (rtx call_insn)
5648 const char *arg_regs[4];
5649 enum machine_mode arg_mode;
5651 int i, output_flag = 0;
5654 /* We neither need nor want argument location descriptors for the
5655 64bit runtime environment or the ELF32 environment. */
5656 if (TARGET_64BIT || TARGET_ELF32)
5659 for (i = 0; i < 4; i++)
5662 /* Specify explicitly that no argument relocations should take place
5663 if using the portable runtime calling conventions. */
5664 if (TARGET_PORTABLE_RUNTIME)
5666 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5671 if (GET_CODE (call_insn) != CALL_INSN)
5673 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
5675 rtx use = XEXP (link, 0);
5677 if (! (GET_CODE (use) == USE
5678 && GET_CODE (XEXP (use, 0)) == REG
5679 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5682 arg_mode = GET_MODE (XEXP (use, 0));
5683 regno = REGNO (XEXP (use, 0));
5684 if (regno >= 23 && regno <= 26)
5686 arg_regs[26 - regno] = "GR";
5687 if (arg_mode == DImode)
5688 arg_regs[25 - regno] = "GR";
5690 else if (regno >= 32 && regno <= 39)
5692 if (arg_mode == SFmode)
5693 arg_regs[(regno - 32) / 2] = "FR";
5696 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5697 arg_regs[(regno - 34) / 2] = "FR";
5698 arg_regs[(regno - 34) / 2 + 1] = "FU";
5700 arg_regs[(regno - 34) / 2] = "FU";
5701 arg_regs[(regno - 34) / 2 + 1] = "FR";
5706 fputs ("\t.CALL ", asm_out_file);
5707 for (i = 0; i < 4; i++)
5712 fputc (',', asm_out_file);
5713 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5716 fputc ('\n', asm_out_file);
5719 /* Return the class of any secondary reload register that is needed to
5720 move IN into a register in class CLASS using mode MODE.
5722 Profiling has showed this routine and its descendants account for
5723 a significant amount of compile time (~7%). So it has been
5724 optimized to reduce redundant computations and eliminate useless
5727 It might be worthwhile to try and make this a leaf function too. */
5730 secondary_reload_class (enum reg_class class, enum machine_mode mode, rtx in)
5732 int regno, is_symbolic;
5734 /* Trying to load a constant into a FP register during PIC code
5735 generation will require %r1 as a scratch register. */
5737 && GET_MODE_CLASS (mode) == MODE_INT
5738 && FP_REG_CLASS_P (class)
5739 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
5742 /* Profiling showed the PA port spends about 1.3% of its compilation
5743 time in true_regnum from calls inside secondary_reload_class. */
5745 if (GET_CODE (in) == REG)
5748 if (regno >= FIRST_PSEUDO_REGISTER)
5749 regno = true_regnum (in);
5751 else if (GET_CODE (in) == SUBREG)
5752 regno = true_regnum (in);
5756 /* If we have something like (mem (mem (...)), we can safely assume the
5757 inner MEM will end up in a general register after reloading, so there's
5758 no need for a secondary reload. */
5759 if (GET_CODE (in) == MEM
5760 && GET_CODE (XEXP (in, 0)) == MEM)
5763 /* Handle out of range displacement for integer mode loads/stores of
5765 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5766 && GET_MODE_CLASS (mode) == MODE_INT
5767 && FP_REG_CLASS_P (class))
5768 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5769 return GENERAL_REGS;
5771 /* A SAR<->FP register copy requires a secondary register (GPR) as
5772 well as secondary memory. */
5773 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5774 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5775 || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5776 return GENERAL_REGS;
5778 if (GET_CODE (in) == HIGH)
5781 /* Profiling has showed GCC spends about 2.6% of its compilation
5782 time in symbolic_operand from calls inside secondary_reload_class.
5784 We use an inline copy and only compute its return value once to avoid
5786 switch (GET_CODE (in))
5796 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
5797 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
5798 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
5808 && read_only_operand (in, VOIDmode))
5811 if (class != R1_REGS && is_symbolic)
5818 function_arg_padding (enum machine_mode mode, tree type)
5821 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5823 /* Return none if justification is not required. */
5825 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5826 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5829 /* The directions set here are ignored when a BLKmode argument larger
5830 than a word is placed in a register. Different code is used for
5831 the stack and registers. This makes it difficult to have a
5832 consistent data representation for both the stack and registers.
5833 For both runtimes, the justification and padding for arguments on
5834 the stack and in registers should be identical. */
5836 /* The 64-bit runtime specifies left justification for aggregates. */
5839 /* The 32-bit runtime architecture specifies right justification.
5840 When the argument is passed on the stack, the argument is padded
5841 with garbage on the left. The HP compiler pads with zeros. */
5845 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5852 /* Do what is necessary for `va_start'. We look at the current function
5853 to determine if stdargs or varargs is used and fill in an initial
5854 va_list. A pointer to this constructor is returned. */
5857 hppa_builtin_saveregs (void)
5860 tree fntype = TREE_TYPE (current_function_decl);
5861 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5862 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5863 != void_type_node)))
5864 ? UNITS_PER_WORD : 0);
5867 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5869 offset = current_function_arg_offset_rtx;
5875 /* Adjust for varargs/stdarg differences. */
5877 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5879 offset = current_function_arg_offset_rtx;
5881 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5882 from the incoming arg pointer and growing to larger addresses. */
5883 for (i = 26, off = -64; i >= 19; i--, off += 8)
5884 emit_move_insn (gen_rtx_MEM (word_mode,
5885 plus_constant (arg_pointer_rtx, off)),
5886 gen_rtx_REG (word_mode, i));
5888 /* The incoming args pointer points just beyond the flushback area;
5889 normally this is not a serious concern. However, when we are doing
5890 varargs/stdargs we want to make the arg pointer point to the start
5891 of the incoming argument area. */
5892 emit_move_insn (virtual_incoming_args_rtx,
5893 plus_constant (arg_pointer_rtx, -64));
5895 /* Now return a pointer to the first anonymous argument. */
5896 return copy_to_reg (expand_binop (Pmode, add_optab,
5897 virtual_incoming_args_rtx,
5898 offset, 0, 0, OPTAB_LIB_WIDEN));
5901 /* Store general registers on the stack. */
5902 dest = gen_rtx_MEM (BLKmode,
5903 plus_constant (current_function_internal_arg_pointer,
5905 set_mem_alias_set (dest, get_varargs_alias_set ());
5906 set_mem_align (dest, BITS_PER_WORD);
5907 move_block_from_reg (23, dest, 4);
5909 /* move_block_from_reg will emit code to store the argument registers
5910 individually as scalar stores.
5912 However, other insns may later load from the same addresses for
5913 a structure load (passing a struct to a varargs routine).
5915 The alias code assumes that such aliasing can never happen, so we
5916 have to keep memory referencing insns from moving up beyond the
5917 last argument register store. So we emit a blockage insn here. */
5918 emit_insn (gen_blockage ());
5920 return copy_to_reg (expand_binop (Pmode, add_optab,
5921 current_function_internal_arg_pointer,
5922 offset, 0, 0, OPTAB_LIB_WIDEN));
5926 hppa_va_start (tree valist, rtx nextarg)
5928 nextarg = expand_builtin_saveregs ();
5929 std_expand_builtin_va_start (valist, nextarg);
5933 hppa_va_arg (tree valist, tree type)
5935 HOST_WIDE_INT size = int_size_in_bytes (type);
5941 /* Every argument in PA64 is supposed to be passed by value
5942 (including large structs). However, as a GCC extension, we
5943 pass zero and variable sized arguments by reference. Empty
5944 structures are a GCC extension not supported by the HP
5945 compilers. Thus, passing them by reference isn't likely
5946 to conflict with the ABI. For variable sized arguments,
5947 GCC doesn't have the infrastructure to allocate these to
5950 /* Arguments with a size greater than 8 must be aligned 0 MOD 16. */
5952 if (size > UNITS_PER_WORD)
5954 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5955 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
5956 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
5957 build_int_2 (-2 * UNITS_PER_WORD, -1));
5958 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5959 TREE_SIDE_EFFECTS (t) = 1;
5960 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5964 return std_expand_builtin_va_arg (valist, type);
5967 ptr = build_pointer_type (type);
5969 /* Args grow upward. */
5970 t = build (POSTINCREMENT_EXPR, TREE_TYPE (valist), valist,
5971 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5972 TREE_SIDE_EFFECTS (t) = 1;
5974 pptr = build_pointer_type (ptr);
5975 t = build1 (NOP_EXPR, pptr, t);
5976 TREE_SIDE_EFFECTS (t) = 1;
5978 t = build1 (INDIRECT_REF, ptr, t);
5979 TREE_SIDE_EFFECTS (t) = 1;
5982 else /* !TARGET_64BIT */
5984 ptr = build_pointer_type (type);
5986 /* "Large" and variable sized types are passed by reference. */
5987 if (size > 8 || size <= 0)
5989 /* Args grow downward. */
5990 t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
5991 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5992 TREE_SIDE_EFFECTS (t) = 1;
5994 pptr = build_pointer_type (ptr);
5995 t = build1 (NOP_EXPR, pptr, t);
5996 TREE_SIDE_EFFECTS (t) = 1;
5998 t = build1 (INDIRECT_REF, ptr, t);
5999 TREE_SIDE_EFFECTS (t) = 1;
6003 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
6004 build_int_2 (-size, -1));
6006 /* Copied from va-pa.h, but we probably don't need to align to
6007 word size, since we generate and preserve that invariant. */
6008 t = build (BIT_AND_EXPR, TREE_TYPE (valist), t,
6009 build_int_2 ((size > 4 ? -8 : -4), -1));
6011 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
6012 TREE_SIDE_EFFECTS (t) = 1;
6014 ofs = (8 - size) % 4;
6017 t = build (PLUS_EXPR, TREE_TYPE (valist), t,
6018 build_int_2 (ofs, 0));
6019 TREE_SIDE_EFFECTS (t) = 1;
6022 t = build1 (NOP_EXPR, ptr, t);
6023 TREE_SIDE_EFFECTS (t) = 1;
6028 return expand_expr (t, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6033 /* This routine handles all the normal conditional branch sequences we
6034 might need to generate. It handles compare immediate vs compare
6035 register, nullification of delay slots, varying length branches,
6036 negated branches, and all combinations of the above. It returns the
6037 output appropriate to emit the branch corresponding to all given
6041 output_cbranch (rtx *operands, int nullify, int length, int negated, rtx insn)
6043 static char buf[100];
6047 /* A conditional branch to the following instruction (eg the delay slot)
6048 is asking for a disaster. This can happen when not optimizing and
6049 when jump optimization fails.
6051 While it is usually safe to emit nothing, this can fail if the
6052 preceding instruction is a nullified branch with an empty delay
6053 slot and the same branch target as this branch. We could check
6054 for this but jump optimization should eliminate nop jumps. It
6055 is always safe to emit a nop. */
6056 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6059 /* The doubleword form of the cmpib instruction doesn't have the LEU
6060 and GTU conditions while the cmpb instruction does. Since we accept
6061 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6062 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6063 operands[2] = gen_rtx_REG (DImode, 0);
6065 /* If this is a long branch with its delay slot unfilled, set `nullify'
6066 as it can nullify the delay slot and save a nop. */
6067 if (length == 8 && dbr_sequence_length () == 0)
6070 /* If this is a short forward conditional branch which did not get
6071 its delay slot filled, the delay slot can still be nullified. */
6072 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6073 nullify = forward_branch_p (insn);
6075 /* A forward branch over a single nullified insn can be done with a
6076 comclr instruction. This avoids a single cycle penalty due to
6077 mis-predicted branch if we fall through (branch not taken). */
6079 && next_real_insn (insn) != 0
6080 && get_attr_length (next_real_insn (insn)) == 4
6081 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6087 /* All short conditional branches except backwards with an unfilled
6091 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6093 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6094 if (GET_MODE (operands[1]) == DImode)
6097 strcat (buf, "%B3");
6099 strcat (buf, "%S3");
6101 strcat (buf, " %2,%r1,%%r0");
6103 strcat (buf, ",n %2,%r1,%0");
6105 strcat (buf, " %2,%r1,%0");
6108 /* All long conditionals. Note a short backward branch with an
6109 unfilled delay slot is treated just like a long backward branch
6110 with an unfilled delay slot. */
6112 /* Handle weird backwards branch with a filled delay slot
6113 with is nullified. */
6114 if (dbr_sequence_length () != 0
6115 && ! forward_branch_p (insn)
6118 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6119 if (GET_MODE (operands[1]) == DImode)
6122 strcat (buf, "%S3");
6124 strcat (buf, "%B3");
6125 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6127 /* Handle short backwards branch with an unfilled delay slot.
6128 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6129 taken and untaken branches. */
6130 else if (dbr_sequence_length () == 0
6131 && ! forward_branch_p (insn)
6132 && INSN_ADDRESSES_SET_P ()
6133 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6134 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6136 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6137 if (GET_MODE (operands[1]) == DImode)
6140 strcat (buf, "%B3 %2,%r1,%0%#");
6142 strcat (buf, "%S3 %2,%r1,%0%#");
6146 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6147 if (GET_MODE (operands[1]) == DImode)
6150 strcat (buf, "%S3");
6152 strcat (buf, "%B3");
6154 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6156 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6162 xoperands[0] = operands[0];
6163 xoperands[1] = operands[1];
6164 xoperands[2] = operands[2];
6165 xoperands[3] = operands[3];
6167 /* The reversed conditional branch must branch over one additional
6168 instruction if the delay slot is filled. If the delay slot
6169 is empty, the instruction after the reversed condition branch
6170 must be nullified. */
6171 nullify = dbr_sequence_length () == 0;
6172 xoperands[4] = nullify ? GEN_INT (length) : GEN_INT (length + 4);
6174 /* Create a reversed conditional branch which branches around
6175 the following insns. */
6176 if (GET_MODE (operands[1]) != DImode)
6182 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6185 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6191 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6194 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6203 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6206 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6212 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6215 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6219 output_asm_insn (buf, xoperands);
6220 return output_lbranch (operands[0], insn);
6228 /* This routine handles long unconditional branches that exceed the
6229 maximum range of a simple branch instruction. */
6232 output_lbranch (rtx dest, rtx insn)
6236 xoperands[0] = dest;
6238 /* First, free up the delay slot. */
6239 if (dbr_sequence_length () != 0)
6241 /* We can't handle a jump in the delay slot. */
6242 if (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN)
6245 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6246 optimize, 0, 0, NULL);
6248 /* Now delete the delay insn. */
6249 PUT_CODE (NEXT_INSN (insn), NOTE);
6250 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6251 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6254 /* Output an insn to save %r1. The runtime documentation doesn't
6255 specify whether the "Clean Up" slot in the callers frame can
6256 be clobbered by the callee. It isn't copied by HP's builtin
6257 alloca, so this suggests that it can be clobbered if necessary.
6258 The "Static Link" location is copied by HP builtin alloca, so
6259 we avoid using it. Using the cleanup slot might be a problem
6260 if we have to interoperate with languages that pass cleanup
6261 information. However, it should be possible to handle these
6262 situations with GCC's asm feature.
6264 The "Current RP" slot is reserved for the called procedure, so
6265 we try to use it when we don't have a frame of our own. It's
6266 rather unlikely that we won't have a frame when we need to emit
6269 Really the way to go long term is a register scavenger; goto
6270 the target of the jump and find a register which we can use
6271 as a scratch to hold the value in %r1. Then, we wouldn't have
6272 to free up the delay slot or clobber a slot that may be needed
6273 for other purposes. */
6276 if (actual_fsize == 0 && !regs_ever_live[2])
6277 /* Use the return pointer slot in the frame marker. */
6278 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6280 /* Use the slot at -40 in the frame marker since HP builtin
6281 alloca doesn't copy it. */
6282 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6286 if (actual_fsize == 0 && !regs_ever_live[2])
6287 /* Use the return pointer slot in the frame marker. */
6288 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6290 /* Use the "Clean Up" slot in the frame marker. In GCC,
6291 the only other use of this location is for copying a
6292 floating point double argument from a floating-point
6293 register to two general registers. The copy is done
6294 as an "atomic" operation when outputting a call, so it
6295 won't interfere with our using the location here. */
6296 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6299 if (TARGET_PORTABLE_RUNTIME)
6301 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6302 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6303 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6307 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6308 if (TARGET_SOM || !TARGET_GAS)
6310 xoperands[1] = gen_label_rtx ();
6311 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6312 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6313 CODE_LABEL_NUMBER (xoperands[1]));
6314 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6318 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6319 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6321 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6324 /* Now output a very long branch to the original target. */
6325 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6327 /* Now restore the value of %r1 in the delay slot. */
6330 if (actual_fsize == 0 && !regs_ever_live[2])
6331 return "ldd -16(%%r30),%%r1";
6333 return "ldd -40(%%r30),%%r1";
6337 if (actual_fsize == 0 && !regs_ever_live[2])
6338 return "ldw -20(%%r30),%%r1";
6340 return "ldw -12(%%r30),%%r1";
6344 /* This routine handles all the branch-on-bit conditional branch sequences we
6345 might need to generate. It handles nullification of delay slots,
6346 varying length branches, negated branches and all combinations of the
6347 above. it returns the appropriate output template to emit the branch. */
6350 output_bb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6351 int negated, rtx insn, int which)
6353 static char buf[100];
6356 /* A conditional branch to the following instruction (eg the delay slot) is
6357 asking for a disaster. I do not think this can happen as this pattern
6358 is only used when optimizing; jump optimization should eliminate the
6359 jump. But be prepared just in case. */
6361 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6364 /* If this is a long branch with its delay slot unfilled, set `nullify'
6365 as it can nullify the delay slot and save a nop. */
6366 if (length == 8 && dbr_sequence_length () == 0)
6369 /* If this is a short forward conditional branch which did not get
6370 its delay slot filled, the delay slot can still be nullified. */
6371 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6372 nullify = forward_branch_p (insn);
6374 /* A forward branch over a single nullified insn can be done with a
6375 extrs instruction. This avoids a single cycle penalty due to
6376 mis-predicted branch if we fall through (branch not taken). */
6379 && next_real_insn (insn) != 0
6380 && get_attr_length (next_real_insn (insn)) == 4
6381 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6388 /* All short conditional branches except backwards with an unfilled
6392 strcpy (buf, "{extrs,|extrw,s,}");
6394 strcpy (buf, "bb,");
6395 if (useskip && GET_MODE (operands[0]) == DImode)
6396 strcpy (buf, "extrd,s,*");
6397 else if (GET_MODE (operands[0]) == DImode)
6398 strcpy (buf, "bb,*");
6399 if ((which == 0 && negated)
6400 || (which == 1 && ! negated))
6405 strcat (buf, " %0,%1,1,%%r0");
6406 else if (nullify && negated)
6407 strcat (buf, ",n %0,%1,%3");
6408 else if (nullify && ! negated)
6409 strcat (buf, ",n %0,%1,%2");
6410 else if (! nullify && negated)
6411 strcat (buf, "%0,%1,%3");
6412 else if (! nullify && ! negated)
6413 strcat (buf, " %0,%1,%2");
6416 /* All long conditionals. Note a short backward branch with an
6417 unfilled delay slot is treated just like a long backward branch
6418 with an unfilled delay slot. */
6420 /* Handle weird backwards branch with a filled delay slot
6421 with is nullified. */
6422 if (dbr_sequence_length () != 0
6423 && ! forward_branch_p (insn)
6426 strcpy (buf, "bb,");
6427 if (GET_MODE (operands[0]) == DImode)
6429 if ((which == 0 && negated)
6430 || (which == 1 && ! negated))
6435 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6437 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6439 /* Handle short backwards branch with an unfilled delay slot.
6440 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6441 taken and untaken branches. */
6442 else if (dbr_sequence_length () == 0
6443 && ! forward_branch_p (insn)
6444 && INSN_ADDRESSES_SET_P ()
6445 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6446 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6448 strcpy (buf, "bb,");
6449 if (GET_MODE (operands[0]) == DImode)
6451 if ((which == 0 && negated)
6452 || (which == 1 && ! negated))
6457 strcat (buf, " %0,%1,%3%#");
6459 strcat (buf, " %0,%1,%2%#");
6463 strcpy (buf, "{extrs,|extrw,s,}");
6464 if (GET_MODE (operands[0]) == DImode)
6465 strcpy (buf, "extrd,s,*");
6466 if ((which == 0 && negated)
6467 || (which == 1 && ! negated))
6471 if (nullify && negated)
6472 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6473 else if (nullify && ! negated)
6474 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6476 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6478 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6488 /* This routine handles all the branch-on-variable-bit conditional branch
6489 sequences we might need to generate. It handles nullification of delay
6490 slots, varying length branches, negated branches and all combinations
6491 of the above. it returns the appropriate output template to emit the
6495 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6496 int negated, rtx insn, int which)
6498 static char buf[100];
6501 /* A conditional branch to the following instruction (eg the delay slot) is
6502 asking for a disaster. I do not think this can happen as this pattern
6503 is only used when optimizing; jump optimization should eliminate the
6504 jump. But be prepared just in case. */
6506 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6509 /* If this is a long branch with its delay slot unfilled, set `nullify'
6510 as it can nullify the delay slot and save a nop. */
6511 if (length == 8 && dbr_sequence_length () == 0)
6514 /* If this is a short forward conditional branch which did not get
6515 its delay slot filled, the delay slot can still be nullified. */
6516 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6517 nullify = forward_branch_p (insn);
6519 /* A forward branch over a single nullified insn can be done with a
6520 extrs instruction. This avoids a single cycle penalty due to
6521 mis-predicted branch if we fall through (branch not taken). */
6524 && next_real_insn (insn) != 0
6525 && get_attr_length (next_real_insn (insn)) == 4
6526 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6533 /* All short conditional branches except backwards with an unfilled
6537 strcpy (buf, "{vextrs,|extrw,s,}");
6539 strcpy (buf, "{bvb,|bb,}");
6540 if (useskip && GET_MODE (operands[0]) == DImode)
6541 strcpy (buf, "extrd,s,*");
6542 else if (GET_MODE (operands[0]) == DImode)
6543 strcpy (buf, "bb,*");
6544 if ((which == 0 && negated)
6545 || (which == 1 && ! negated))
6550 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6551 else if (nullify && negated)
6552 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6553 else if (nullify && ! negated)
6554 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6555 else if (! nullify && negated)
6556 strcat (buf, "{%0,%3|%0,%%sar,%3}");
6557 else if (! nullify && ! negated)
6558 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6561 /* All long conditionals. Note a short backward branch with an
6562 unfilled delay slot is treated just like a long backward branch
6563 with an unfilled delay slot. */
6565 /* Handle weird backwards branch with a filled delay slot
6566 with is nullified. */
6567 if (dbr_sequence_length () != 0
6568 && ! forward_branch_p (insn)
6571 strcpy (buf, "{bvb,|bb,}");
6572 if (GET_MODE (operands[0]) == DImode)
6574 if ((which == 0 && negated)
6575 || (which == 1 && ! negated))
6580 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6582 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6584 /* Handle short backwards branch with an unfilled delay slot.
6585 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6586 taken and untaken branches. */
6587 else if (dbr_sequence_length () == 0
6588 && ! forward_branch_p (insn)
6589 && INSN_ADDRESSES_SET_P ()
6590 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6591 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6593 strcpy (buf, "{bvb,|bb,}");
6594 if (GET_MODE (operands[0]) == DImode)
6596 if ((which == 0 && negated)
6597 || (which == 1 && ! negated))
6602 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6604 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6608 strcpy (buf, "{vextrs,|extrw,s,}");
6609 if (GET_MODE (operands[0]) == DImode)
6610 strcpy (buf, "extrd,s,*");
6611 if ((which == 0 && negated)
6612 || (which == 1 && ! negated))
6616 if (nullify && negated)
6617 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6618 else if (nullify && ! negated)
6619 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6621 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6623 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6633 /* Return the output template for emitting a dbra type insn.
6635 Note it may perform some output operations on its own before
6636 returning the final output string. */
6638 output_dbra (rtx *operands, rtx insn, int which_alternative)
6641 /* A conditional branch to the following instruction (eg the delay slot) is
6642 asking for a disaster. Be prepared! */
6644 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6646 if (which_alternative == 0)
6647 return "ldo %1(%0),%0";
6648 else if (which_alternative == 1)
6650 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6651 output_asm_insn ("ldw -16(%%r30),%4", operands);
6652 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6653 return "{fldws|fldw} -16(%%r30),%0";
6657 output_asm_insn ("ldw %0,%4", operands);
6658 return "ldo %1(%4),%4\n\tstw %4,%0";
6662 if (which_alternative == 0)
6664 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6665 int length = get_attr_length (insn);
6667 /* If this is a long branch with its delay slot unfilled, set `nullify'
6668 as it can nullify the delay slot and save a nop. */
6669 if (length == 8 && dbr_sequence_length () == 0)
6672 /* If this is a short forward conditional branch which did not get
6673 its delay slot filled, the delay slot can still be nullified. */
6674 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6675 nullify = forward_branch_p (insn);
6677 /* Handle short versions first. */
6678 if (length == 4 && nullify)
6679 return "addib,%C2,n %1,%0,%3";
6680 else if (length == 4 && ! nullify)
6681 return "addib,%C2 %1,%0,%3";
6682 else if (length == 8)
6684 /* Handle weird backwards branch with a fulled delay slot
6685 which is nullified. */
6686 if (dbr_sequence_length () != 0
6687 && ! forward_branch_p (insn)
6689 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6690 /* Handle short backwards branch with an unfilled delay slot.
6691 Using a addb;nop rather than addi;bl saves 1 cycle for both
6692 taken and untaken branches. */
6693 else if (dbr_sequence_length () == 0
6694 && ! forward_branch_p (insn)
6695 && INSN_ADDRESSES_SET_P ()
6696 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6697 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6698 return "addib,%C2 %1,%0,%3%#";
6700 /* Handle normal cases. */
6702 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6704 return "addi,%N2 %1,%0,%0\n\tb %3";
6709 /* Deal with gross reload from FP register case. */
6710 else if (which_alternative == 1)
6712 /* Move loop counter from FP register to MEM then into a GR,
6713 increment the GR, store the GR into MEM, and finally reload
6714 the FP register from MEM from within the branch's delay slot. */
6715 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6717 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6718 if (get_attr_length (insn) == 24)
6719 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6721 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6723 /* Deal with gross reload from memory case. */
6726 /* Reload loop counter from memory, the store back to memory
6727 happens in the branch's delay slot. */
6728 output_asm_insn ("ldw %0,%4", operands);
6729 if (get_attr_length (insn) == 12)
6730 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6732 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6736 /* Return the output template for emitting a dbra type insn.
6738 Note it may perform some output operations on its own before
6739 returning the final output string. */
6741 output_movb (rtx *operands, rtx insn, int which_alternative,
6742 int reverse_comparison)
6745 /* A conditional branch to the following instruction (eg the delay slot) is
6746 asking for a disaster. Be prepared! */
6748 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6750 if (which_alternative == 0)
6751 return "copy %1,%0";
6752 else if (which_alternative == 1)
6754 output_asm_insn ("stw %1,-16(%%r30)", operands);
6755 return "{fldws|fldw} -16(%%r30),%0";
6757 else if (which_alternative == 2)
6763 /* Support the second variant. */
6764 if (reverse_comparison)
6765 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6767 if (which_alternative == 0)
6769 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6770 int length = get_attr_length (insn);
6772 /* If this is a long branch with its delay slot unfilled, set `nullify'
6773 as it can nullify the delay slot and save a nop. */
6774 if (length == 8 && dbr_sequence_length () == 0)
6777 /* If this is a short forward conditional branch which did not get
6778 its delay slot filled, the delay slot can still be nullified. */
6779 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6780 nullify = forward_branch_p (insn);
6782 /* Handle short versions first. */
6783 if (length == 4 && nullify)
6784 return "movb,%C2,n %1,%0,%3";
6785 else if (length == 4 && ! nullify)
6786 return "movb,%C2 %1,%0,%3";
6787 else if (length == 8)
6789 /* Handle weird backwards branch with a filled delay slot
6790 which is nullified. */
6791 if (dbr_sequence_length () != 0
6792 && ! forward_branch_p (insn)
6794 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6796 /* Handle short backwards branch with an unfilled delay slot.
6797 Using a movb;nop rather than or;bl saves 1 cycle for both
6798 taken and untaken branches. */
6799 else if (dbr_sequence_length () == 0
6800 && ! forward_branch_p (insn)
6801 && INSN_ADDRESSES_SET_P ()
6802 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6803 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6804 return "movb,%C2 %1,%0,%3%#";
6805 /* Handle normal cases. */
6807 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6809 return "or,%N2 %1,%%r0,%0\n\tb %3";
6814 /* Deal with gross reload from FP register case. */
6815 else if (which_alternative == 1)
6817 /* Move loop counter from FP register to MEM then into a GR,
6818 increment the GR, store the GR into MEM, and finally reload
6819 the FP register from MEM from within the branch's delay slot. */
6820 output_asm_insn ("stw %1,-16(%%r30)", operands);
6821 if (get_attr_length (insn) == 12)
6822 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6824 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6826 /* Deal with gross reload from memory case. */
6827 else if (which_alternative == 2)
6829 /* Reload loop counter from memory, the store back to memory
6830 happens in the branch's delay slot. */
6831 if (get_attr_length (insn) == 8)
6832 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6834 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6836 /* Handle SAR as a destination. */
6839 if (get_attr_length (insn) == 8)
6840 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6842 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
6846 /* Copy any FP arguments in INSN into integer registers. */
6848 copy_fp_args (rtx insn)
6853 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6855 int arg_mode, regno;
6856 rtx use = XEXP (link, 0);
6858 if (! (GET_CODE (use) == USE
6859 && GET_CODE (XEXP (use, 0)) == REG
6860 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6863 arg_mode = GET_MODE (XEXP (use, 0));
6864 regno = REGNO (XEXP (use, 0));
6866 /* Is it a floating point register? */
6867 if (regno >= 32 && regno <= 39)
6869 /* Copy the FP register into an integer register via memory. */
6870 if (arg_mode == SFmode)
6872 xoperands[0] = XEXP (use, 0);
6873 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6874 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
6875 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6879 xoperands[0] = XEXP (use, 0);
6880 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6881 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
6882 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6883 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6889 /* Compute length of the FP argument copy sequence for INSN. */
6891 length_fp_args (rtx insn)
6896 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6898 int arg_mode, regno;
6899 rtx use = XEXP (link, 0);
6901 if (! (GET_CODE (use) == USE
6902 && GET_CODE (XEXP (use, 0)) == REG
6903 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6906 arg_mode = GET_MODE (XEXP (use, 0));
6907 regno = REGNO (XEXP (use, 0));
6909 /* Is it a floating point register? */
6910 if (regno >= 32 && regno <= 39)
6912 if (arg_mode == SFmode)
6922 /* Return the attribute length for the millicode call instruction INSN.
6923 The length must match the code generated by output_millicode_call.
6924 We include the delay slot in the returned length as it is better to
6925 over estimate the length than to under estimate it. */
6928 attr_length_millicode_call (rtx insn)
6930 unsigned long distance = -1;
6931 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
6933 if (INSN_ADDRESSES_SET_P ())
6935 distance = (total + insn_current_reference_address (insn));
6936 if (distance < total)
6942 if (!TARGET_LONG_CALLS && distance < 7600000)
6947 else if (TARGET_PORTABLE_RUNTIME)
6951 if (!TARGET_LONG_CALLS && distance < 240000)
6954 if (TARGET_LONG_ABS_CALL && !flag_pic)
6961 /* INSN is a function call. It may have an unconditional jump
6964 CALL_DEST is the routine we are calling. */
6967 output_millicode_call (rtx insn, rtx call_dest)
6969 int attr_length = get_attr_length (insn);
6970 int seq_length = dbr_sequence_length ();
6975 xoperands[0] = call_dest;
6976 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6978 /* Handle the common case where we are sure that the branch will
6979 reach the beginning of the $CODE$ subspace. The within reach
6980 form of the $$sh_func_adrs call has a length of 28. Because
6981 it has an attribute type of multi, it never has a nonzero
6982 sequence length. The length of the $$sh_func_adrs is the same
6983 as certain out of reach PIC calls to other routines. */
6984 if (!TARGET_LONG_CALLS
6985 && ((seq_length == 0
6986 && (attr_length == 12
6987 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
6988 || (seq_length != 0 && attr_length == 8)))
6990 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
6996 /* It might seem that one insn could be saved by accessing
6997 the millicode function using the linkage table. However,
6998 this doesn't work in shared libraries and other dynamically
6999 loaded objects. Using a pc-relative sequence also avoids
7000 problems related to the implicit use of the gp register. */
7001 output_asm_insn ("b,l .+8,%%r1", xoperands);
7005 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7006 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7010 xoperands[1] = gen_label_rtx ();
7011 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7012 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7013 CODE_LABEL_NUMBER (xoperands[1]));
7014 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7017 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7019 else if (TARGET_PORTABLE_RUNTIME)
7021 /* Pure portable runtime doesn't allow be/ble; we also don't
7022 have PIC support in the assembler/linker, so this sequence
7025 /* Get the address of our target into %r1. */
7026 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7027 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7029 /* Get our return address into %r31. */
7030 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7031 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7033 /* Jump to our target address in %r1. */
7034 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7038 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7040 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7042 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7046 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7047 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7049 if (TARGET_SOM || !TARGET_GAS)
7051 /* The HP assembler can generate relocations for the
7052 difference of two symbols. GAS can do this for a
7053 millicode symbol but not an arbitrary external
7054 symbol when generating SOM output. */
7055 xoperands[1] = gen_label_rtx ();
7056 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7057 CODE_LABEL_NUMBER (xoperands[1]));
7058 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7059 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7063 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7064 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7068 /* Jump to our target address in %r1. */
7069 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7073 if (seq_length == 0)
7074 output_asm_insn ("nop", xoperands);
7076 /* We are done if there isn't a jump in the delay slot. */
7077 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7080 /* This call has an unconditional jump in its delay slot. */
7081 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7083 /* See if the return address can be adjusted. Use the containing
7084 sequence insn's address. */
7085 if (INSN_ADDRESSES_SET_P ())
7087 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7088 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7089 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7091 if (VAL_14_BITS_P (distance))
7093 xoperands[1] = gen_label_rtx ();
7094 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7095 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7096 CODE_LABEL_NUMBER (xoperands[1]));
7099 /* ??? This branch may not reach its target. */
7100 output_asm_insn ("nop\n\tb,n %0", xoperands);
7103 /* ??? This branch may not reach its target. */
7104 output_asm_insn ("nop\n\tb,n %0", xoperands);
7106 /* Delete the jump. */
7107 PUT_CODE (NEXT_INSN (insn), NOTE);
7108 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7109 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7114 /* Return the attribute length of the call instruction INSN. The SIBCALL
7115 flag indicates whether INSN is a regular call or a sibling call. The
7116 length returned must be longer than the code actually generated by
7117 output_call. Since branch shortening is done before delay branch
7118 sequencing, there is no way to determine whether or not the delay
7119 slot will be filled during branch shortening. Even when the delay
7120 slot is filled, we may have to add a nop if the delay slot contains
7121 a branch that can't reach its target. Thus, we always have to include
7122 the delay slot in the length estimate. This used to be done in
7123 pa_adjust_insn_length but we do it here now as some sequences always
7124 fill the delay slot and we can save four bytes in the estimate for
7128 attr_length_call (rtx insn, int sibcall)
7134 rtx pat = PATTERN (insn);
7135 unsigned long distance = -1;
7137 if (INSN_ADDRESSES_SET_P ())
7139 unsigned long total;
7141 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7142 distance = (total + insn_current_reference_address (insn));
7143 if (distance < total)
7147 /* Determine if this is a local call. */
7148 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL)
7149 call_dest = XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0);
7151 call_dest = XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0);
7153 call_decl = SYMBOL_REF_DECL (call_dest);
7154 local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7156 /* pc-relative branch. */
7157 if (!TARGET_LONG_CALLS
7158 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7159 || distance < 240000))
7162 /* 64-bit plabel sequence. */
7163 else if (TARGET_64BIT && !local_call)
7164 length += sibcall ? 28 : 24;
7166 /* non-pic long absolute branch sequence. */
7167 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7170 /* long pc-relative branch sequence. */
7171 else if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7172 || (TARGET_64BIT && !TARGET_GAS)
7173 || (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7177 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS)
7181 /* 32-bit plabel sequence. */
7187 length += length_fp_args (insn);
7197 if (!TARGET_NO_SPACE_REGS)
7205 /* INSN is a function call. It may have an unconditional jump
7208 CALL_DEST is the routine we are calling. */
7211 output_call (rtx insn, rtx call_dest, int sibcall)
7213 int delay_insn_deleted = 0;
7214 int delay_slot_filled = 0;
7215 int seq_length = dbr_sequence_length ();
7216 tree call_decl = SYMBOL_REF_DECL (call_dest);
7217 int local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7220 xoperands[0] = call_dest;
7222 /* Handle the common case where we're sure that the branch will reach
7223 the beginning of the "$CODE$" subspace. This is the beginning of
7224 the current function if we are in a named section. */
7225 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7227 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7228 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7232 if (TARGET_64BIT && !local_call)
7234 /* ??? As far as I can tell, the HP linker doesn't support the
7235 long pc-relative sequence described in the 64-bit runtime
7236 architecture. So, we use a slightly longer indirect call. */
7237 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
7239 xoperands[0] = p->internal_label;
7240 xoperands[1] = gen_label_rtx ();
7242 /* If this isn't a sibcall, we put the load of %r27 into the
7243 delay slot. We can't do this in a sibcall as we don't
7244 have a second call-clobbered scratch register available. */
7246 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7249 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7250 optimize, 0, 0, NULL);
7252 /* Now delete the delay insn. */
7253 PUT_CODE (NEXT_INSN (insn), NOTE);
7254 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7255 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7256 delay_insn_deleted = 1;
7259 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7260 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7261 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7265 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7266 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7267 output_asm_insn ("bve (%%r1)", xoperands);
7271 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7272 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7273 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7274 delay_slot_filled = 1;
7279 int indirect_call = 0;
7281 /* Emit a long call. There are several different sequences
7282 of increasing length and complexity. In most cases,
7283 they don't allow an instruction in the delay slot. */
7284 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7285 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7286 && !(TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7291 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7293 && (!TARGET_PA_20 || indirect_call))
7295 /* A non-jump insn in the delay slot. By definition we can
7296 emit this insn before the call (and in fact before argument
7298 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0,
7301 /* Now delete the delay insn. */
7302 PUT_CODE (NEXT_INSN (insn), NOTE);
7303 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7304 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7305 delay_insn_deleted = 1;
7308 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7310 /* This is the best sequence for making long calls in
7311 non-pic code. Unfortunately, GNU ld doesn't provide
7312 the stub needed for external calls, and GAS's support
7313 for this with the SOM linker is buggy. It is safe
7314 to use this for local calls. */
7315 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7317 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7321 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7324 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7326 output_asm_insn ("copy %%r31,%%r2", xoperands);
7327 delay_slot_filled = 1;
7332 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7333 || (TARGET_64BIT && !TARGET_GAS))
7335 /* The HP assembler and linker can handle relocations
7336 for the difference of two symbols. GAS and the HP
7337 linker can't do this when one of the symbols is
7339 xoperands[1] = gen_label_rtx ();
7340 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7341 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7342 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7343 CODE_LABEL_NUMBER (xoperands[1]));
7344 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7346 else if (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7348 /* GAS currently can't generate the relocations that
7349 are needed for the SOM linker under HP-UX using this
7350 sequence. The GNU linker doesn't generate the stubs
7351 that are needed for external calls on TARGET_ELF32
7352 with this sequence. For now, we have to use a
7353 longer plabel sequence when using GAS. */
7354 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7355 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7357 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7362 /* Emit a long plabel-based call sequence. This is
7363 essentially an inline implementation of $$dyncall.
7364 We don't actually try to call $$dyncall as this is
7365 as difficult as calling the function itself. */
7366 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
7368 xoperands[0] = p->internal_label;
7369 xoperands[1] = gen_label_rtx ();
7371 /* Since the call is indirect, FP arguments in registers
7372 need to be copied to the general registers. Then, the
7373 argument relocation stub will copy them back. */
7375 copy_fp_args (insn);
7379 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7380 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7381 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7385 output_asm_insn ("addil LR'%0-$global$,%%r27",
7387 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7391 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7392 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7393 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7394 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7396 if (!sibcall && !TARGET_PA_20)
7398 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7399 if (TARGET_NO_SPACE_REGS)
7400 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7402 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7409 output_asm_insn ("bve (%%r1)", xoperands);
7414 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7415 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7416 delay_slot_filled = 1;
7419 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7424 if (!TARGET_NO_SPACE_REGS)
7425 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7430 if (TARGET_NO_SPACE_REGS)
7431 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7433 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7437 if (TARGET_NO_SPACE_REGS)
7438 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7440 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7443 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7445 output_asm_insn ("copy %%r31,%%r2", xoperands);
7446 delay_slot_filled = 1;
7453 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7454 output_asm_insn ("nop", xoperands);
7456 /* We are done if there isn't a jump in the delay slot. */
7458 || delay_insn_deleted
7459 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7462 /* A sibcall should never have a branch in the delay slot. */
7466 /* This call has an unconditional jump in its delay slot. */
7467 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7469 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7471 /* See if the return address can be adjusted. Use the containing
7472 sequence insn's address. */
7473 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7474 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7475 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7477 if (VAL_14_BITS_P (distance))
7479 xoperands[1] = gen_label_rtx ();
7480 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7481 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7482 CODE_LABEL_NUMBER (xoperands[1]));
7485 output_asm_insn ("nop\n\tb,n %0", xoperands);
7488 output_asm_insn ("b,n %0", xoperands);
7490 /* Delete the jump. */
7491 PUT_CODE (NEXT_INSN (insn), NOTE);
7492 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7493 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7498 /* Return the attribute length of the indirect call instruction INSN.
7499 The length must match the code generated by output_indirect call.
7500 The returned length includes the delay slot. Currently, the delay
7501 slot of an indirect call sequence is not exposed and it is used by
7502 the sequence itself. */
7505 attr_length_indirect_call (rtx insn)
7507 unsigned long distance = -1;
7508 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7510 if (INSN_ADDRESSES_SET_P ())
7512 distance = (total + insn_current_reference_address (insn));
7513 if (distance < total)
7520 if (TARGET_FAST_INDIRECT_CALLS
7521 || (!TARGET_PORTABLE_RUNTIME
7522 && ((TARGET_PA_20 && distance < 7600000) || distance < 240000)))
7528 if (TARGET_PORTABLE_RUNTIME)
7531 /* Out of reach, can use ble. */
7536 output_indirect_call (rtx insn, rtx call_dest)
7542 xoperands[0] = call_dest;
7543 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7544 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7548 /* First the special case for kernels, level 0 systems, etc. */
7549 if (TARGET_FAST_INDIRECT_CALLS)
7550 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7552 /* Now the normal case -- we can reach $$dyncall directly or
7553 we're sure that we can get there via a long-branch stub.
7555 No need to check target flags as the length uniquely identifies
7556 the remaining cases. */
7557 if (attr_length_indirect_call (insn) == 8)
7558 return ".CALL\tARGW0=GR\n\t{bl|b,l} $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7560 /* Long millicode call, but we are not generating PIC or portable runtime
7562 if (attr_length_indirect_call (insn) == 12)
7563 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7565 /* Long millicode call for portable runtime. */
7566 if (attr_length_indirect_call (insn) == 20)
7567 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7569 /* We need a long PIC call to $$dyncall. */
7570 xoperands[0] = NULL_RTX;
7571 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7572 if (TARGET_SOM || !TARGET_GAS)
7574 xoperands[0] = gen_label_rtx ();
7575 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7576 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7577 CODE_LABEL_NUMBER (xoperands[0]));
7578 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7582 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7583 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7586 output_asm_insn ("blr %%r0,%%r2", xoperands);
7587 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7591 /* Return the total length of the save and restore instructions needed for
7592 the data linkage table pointer (i.e., the PIC register) across the call
7593 instruction INSN. No-return calls do not require a save and restore.
7594 In addition, we may be able to avoid the save and restore for calls
7595 within the same translation unit. */
7598 attr_length_save_restore_dltp (rtx insn)
7600 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7606 /* In HPUX 8.0's shared library scheme, special relocations are needed
7607 for function labels if they might be passed to a function
7608 in a shared library (because shared libraries don't live in code
7609 space), and special magic is needed to construct their address. */
7612 hppa_encode_label (rtx sym)
7614 const char *str = XSTR (sym, 0);
7615 int len = strlen (str) + 1;
7618 p = newstr = alloca (len + 1);
7622 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7626 pa_encode_section_info (tree decl, rtx rtl, int first)
7628 if (first && TEXT_SPACE_P (decl))
7630 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7631 if (TREE_CODE (decl) == FUNCTION_DECL)
7632 hppa_encode_label (XEXP (rtl, 0));
7636 /* This is sort of inverse to pa_encode_section_info. */
7639 pa_strip_name_encoding (const char *str)
7641 str += (*str == '@');
7642 str += (*str == '*');
7647 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7649 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7652 /* Returns 1 if OP is a function label involved in a simple addition
7653 with a constant. Used to keep certain patterns from matching
7654 during instruction combination. */
7656 is_function_label_plus_const (rtx op)
7658 /* Strip off any CONST. */
7659 if (GET_CODE (op) == CONST)
7662 return (GET_CODE (op) == PLUS
7663 && function_label_operand (XEXP (op, 0), Pmode)
7664 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7667 /* Output assembly code for a thunk to FUNCTION. */
7670 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7671 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7674 const char *fname = XSTR (XEXP (DECL_RTL (function), 0), 0);
7675 const char *tname = XSTR (XEXP (DECL_RTL (thunk_fndecl), 0), 0);
7676 int val_14 = VAL_14_BITS_P (delta);
7678 static unsigned int current_thunk_number;
7681 ASM_OUTPUT_LABEL (file, tname);
7682 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7684 fname = (*targetm.strip_name_encoding) (fname);
7685 tname = (*targetm.strip_name_encoding) (tname);
7687 /* Output the thunk. We know that the function is in the same
7688 translation unit (i.e., the same space) as the thunk, and that
7689 thunks are output after their method. Thus, we don't need an
7690 external branch to reach the function. With SOM and GAS,
7691 functions and thunks are effectively in different sections.
7692 Thus, we can always use a IA-relative branch and the linker
7693 will add a long branch stub if necessary.
7695 However, we have to be careful when generating PIC code on the
7696 SOM port to ensure that the sequence does not transfer to an
7697 import stub for the target function as this could clobber the
7698 return value saved at SP-24. This would also apply to the
7699 32-bit linux port if the multi-space model is implemented. */
7700 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7701 && !(flag_pic && TREE_PUBLIC (function))
7702 && (TARGET_GAS || last_address < 262132))
7703 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7704 && ((targetm.have_named_sections
7705 && DECL_SECTION_NAME (thunk_fndecl) != NULL
7706 /* The GNU 64-bit linker has rather poor stub management.
7707 So, we use a long branch from thunks that aren't in
7708 the same section as the target function. */
7710 && (DECL_SECTION_NAME (thunk_fndecl)
7711 != DECL_SECTION_NAME (function)))
7712 || ((DECL_SECTION_NAME (thunk_fndecl)
7713 == DECL_SECTION_NAME (function))
7714 && last_address < 262132)))
7715 || (!targetm.have_named_sections && last_address < 262132))))
7719 fprintf (file, "\tb %s\n\tldo " HOST_WIDE_INT_PRINT_DEC
7720 "(%%r26),%%r26\n", fname, delta);
7725 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7727 fprintf (file, "\tb %s\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7728 "(%%r1),%%r26\n", fname, delta);
7732 else if (TARGET_64BIT)
7734 /* We only have one call-clobbered scratch register, so we can't
7735 make use of the delay slot if delta doesn't fit in 14 bits. */
7737 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7738 ",%%r26\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7739 "(%%r1),%%r26\n", delta, delta);
7741 fprintf (file, "\tb,l .+8,%%r1\n");
7745 fprintf (file, "\taddil L'%s-$PIC_pcrel$0+4,%%r1\n", fname);
7746 fprintf (file, "\tldo R'%s-$PIC_pcrel$0+8(%%r1),%%r1\n", fname);
7750 int off = val_14 ? 8 : 16;
7751 fprintf (file, "\taddil L'%s-%s-%d,%%r1\n", fname, tname, off);
7752 fprintf (file, "\tldo R'%s-%s-%d(%%r1),%%r1\n", fname, tname, off);
7757 fprintf (file, "\tbv %%r0(%%r1)\n\tldo ");
7758 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7763 fprintf (file, "\tbv,n %%r0(%%r1)\n");
7767 else if (TARGET_PORTABLE_RUNTIME)
7769 fprintf (file, "\tldil L'%s,%%r1\n", fname);
7770 fprintf (file, "\tldo R'%s(%%r1),%%r22\n", fname);
7774 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7775 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7780 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7782 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7783 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7787 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7789 /* The function is accessible from outside this module. The only
7790 way to avoid an import stub between the thunk and function is to
7791 call the function directly with an indirect sequence similar to
7792 that used by $$dyncall. This is possible because $$dyncall acts
7793 as the import stub in an indirect call. */
7796 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7797 lab = (*targetm.strip_name_encoding) (label);
7799 fprintf (file, "\taddil LT'%s,%%r19\n", lab);
7800 fprintf (file, "\tldw RT'%s(%%r1),%%r22\n", lab);
7801 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7802 fprintf (file, "\tbb,>=,n %%r22,30,.+16\n");
7803 fprintf (file, "\tdepi 0,31,2,%%r22\n");
7804 fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
7805 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7808 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7814 fprintf (file, "\tbve (%%r22)\n\tldo ");
7819 if (TARGET_NO_SPACE_REGS)
7821 fprintf (file, "\tbe 0(%%sr4,%%r22)\n\tldo ");
7826 fprintf (file, "\tldsid (%%sr0,%%r22),%%r21\n");
7827 fprintf (file, "\tmtsp %%r21,%%sr0\n");
7828 fprintf (file, "\tbe 0(%%sr0,%%r22)\n\tldo ");
7834 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7836 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7841 fprintf (file, "\tb,l .+8,%%r1\n");
7843 fprintf (file, "\tbl .+8,%%r1\n");
7845 if (TARGET_SOM || !TARGET_GAS)
7847 fprintf (file, "\taddil L'%s-%s-8,%%r1\n", fname, tname);
7848 fprintf (file, "\tldo R'%s-%s-8(%%r1),%%r22\n", fname, tname);
7852 fprintf (file, "\taddil L'%s-$PIC_pcrel$0+4,%%r1\n", fname);
7853 fprintf (file, "\tldo R'%s-$PIC_pcrel$0+8(%%r1),%%r22\n", fname);
7858 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7859 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7864 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7866 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7867 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7874 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC ",%%r26\n", delta);
7876 fprintf (file, "\tldil L'%s,%%r22\n", fname);
7877 fprintf (file, "\tbe R'%s(%%sr4,%%r22)\n\tldo ", fname);
7881 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7886 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7891 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
7893 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7896 fprintf (file, "\t.align 4\n");
7897 ASM_OUTPUT_LABEL (file, label);
7898 fprintf (file, "\t.word P'%s\n", fname);
7899 function_section (thunk_fndecl);
7902 current_thunk_number++;
7903 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
7904 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
7905 last_address += nbytes;
7906 update_total_code_bytes (nbytes);
7909 /* Only direct calls to static functions are allowed to be sibling (tail)
7912 This restriction is necessary because some linker generated stubs will
7913 store return pointers into rp' in some cases which might clobber a
7914 live value already in rp'.
7916 In a sibcall the current function and the target function share stack
7917 space. Thus if the path to the current function and the path to the
7918 target function save a value in rp', they save the value into the
7919 same stack slot, which has undesirable consequences.
7921 Because of the deferred binding nature of shared libraries any function
7922 with external scope could be in a different load module and thus require
7923 rp' to be saved when calling that function. So sibcall optimizations
7924 can only be safe for static function.
7926 Note that GCC never needs return value relocations, so we don't have to
7927 worry about static calls with return value relocations (which require
7930 It is safe to perform a sibcall optimization when the target function
7931 will never return. */
7933 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
7935 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
7936 single subspace mode and the call is not indirect. As far as I know,
7937 there is no operating system support for the multiple subspace mode.
7938 It might be possible to support indirect calls if we didn't use
7939 $$dyncall (see the indirect sequence generated in output_call). */
7941 return (decl != NULL_TREE);
7943 /* Sibcalls are not ok because the arg pointer register is not a fixed
7944 register. This prevents the sibcall optimization from occurring. In
7945 addition, there are problems with stub placement using GNU ld. This
7946 is because a normal sibcall branch uses a 17-bit relocation while
7947 a regular call branch uses a 22-bit relocation. As a result, more
7948 care needs to be taken in the placement of long-branch stubs. */
7953 && !TARGET_PORTABLE_RUNTIME
7954 && !TREE_PUBLIC (decl));
7957 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7958 use in fmpyadd instructions. */
7960 fmpyaddoperands (rtx *operands)
7962 enum machine_mode mode = GET_MODE (operands[0]);
7964 /* Must be a floating point mode. */
7965 if (mode != SFmode && mode != DFmode)
7968 /* All modes must be the same. */
7969 if (! (mode == GET_MODE (operands[1])
7970 && mode == GET_MODE (operands[2])
7971 && mode == GET_MODE (operands[3])
7972 && mode == GET_MODE (operands[4])
7973 && mode == GET_MODE (operands[5])))
7976 /* All operands must be registers. */
7977 if (! (GET_CODE (operands[1]) == REG
7978 && GET_CODE (operands[2]) == REG
7979 && GET_CODE (operands[3]) == REG
7980 && GET_CODE (operands[4]) == REG
7981 && GET_CODE (operands[5]) == REG))
7984 /* Only 2 real operands to the addition. One of the input operands must
7985 be the same as the output operand. */
7986 if (! rtx_equal_p (operands[3], operands[4])
7987 && ! rtx_equal_p (operands[3], operands[5]))
7990 /* Inout operand of add can not conflict with any operands from multiply. */
7991 if (rtx_equal_p (operands[3], operands[0])
7992 || rtx_equal_p (operands[3], operands[1])
7993 || rtx_equal_p (operands[3], operands[2]))
7996 /* multiply can not feed into addition operands. */
7997 if (rtx_equal_p (operands[4], operands[0])
7998 || rtx_equal_p (operands[5], operands[0]))
8001 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8003 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8004 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8005 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8006 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8007 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8008 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8011 /* Passed. Operands are suitable for fmpyadd. */
8015 #if !defined(USE_COLLECT2)
8017 pa_asm_out_constructor (rtx symbol, int priority)
8019 if (!function_label_operand (symbol, VOIDmode))
8020 hppa_encode_label (symbol);
8022 #ifdef CTORS_SECTION_ASM_OP
8023 default_ctor_section_asm_out_constructor (symbol, priority);
8025 # ifdef TARGET_ASM_NAMED_SECTION
8026 default_named_section_asm_out_constructor (symbol, priority);
8028 default_stabs_asm_out_constructor (symbol, priority);
8034 pa_asm_out_destructor (rtx symbol, int priority)
8036 if (!function_label_operand (symbol, VOIDmode))
8037 hppa_encode_label (symbol);
8039 #ifdef DTORS_SECTION_ASM_OP
8040 default_dtor_section_asm_out_destructor (symbol, priority);
8042 # ifdef TARGET_ASM_NAMED_SECTION
8043 default_named_section_asm_out_destructor (symbol, priority);
8045 default_stabs_asm_out_destructor (symbol, priority);
8051 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8052 use in fmpysub instructions. */
8054 fmpysuboperands (rtx *operands)
8056 enum machine_mode mode = GET_MODE (operands[0]);
8058 /* Must be a floating point mode. */
8059 if (mode != SFmode && mode != DFmode)
8062 /* All modes must be the same. */
8063 if (! (mode == GET_MODE (operands[1])
8064 && mode == GET_MODE (operands[2])
8065 && mode == GET_MODE (operands[3])
8066 && mode == GET_MODE (operands[4])
8067 && mode == GET_MODE (operands[5])))
8070 /* All operands must be registers. */
8071 if (! (GET_CODE (operands[1]) == REG
8072 && GET_CODE (operands[2]) == REG
8073 && GET_CODE (operands[3]) == REG
8074 && GET_CODE (operands[4]) == REG
8075 && GET_CODE (operands[5]) == REG))
8078 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8079 operation, so operands[4] must be the same as operand[3]. */
8080 if (! rtx_equal_p (operands[3], operands[4]))
8083 /* multiply can not feed into subtraction. */
8084 if (rtx_equal_p (operands[5], operands[0]))
8087 /* Inout operand of sub can not conflict with any operands from multiply. */
8088 if (rtx_equal_p (operands[3], operands[0])
8089 || rtx_equal_p (operands[3], operands[1])
8090 || rtx_equal_p (operands[3], operands[2]))
8093 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8095 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8096 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8097 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8098 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8099 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8100 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8103 /* Passed. Operands are suitable for fmpysub. */
8108 plus_xor_ior_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8110 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
8111 || GET_CODE (op) == IOR);
8114 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8115 constants for shadd instructions. */
8117 shadd_constant_p (int val)
8119 if (val == 2 || val == 4 || val == 8)
8125 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
8126 the valid constant for shadd instructions. */
8128 shadd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8130 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
8133 /* Return 1 if OP is valid as a base or index register in a
8137 borx_reg_operand (rtx op, enum machine_mode mode)
8139 if (GET_CODE (op) != REG)
8142 /* We must reject virtual registers as the only expressions that
8143 can be instantiated are REG and REG+CONST. */
8144 if (op == virtual_incoming_args_rtx
8145 || op == virtual_stack_vars_rtx
8146 || op == virtual_stack_dynamic_rtx
8147 || op == virtual_outgoing_args_rtx
8148 || op == virtual_cfa_rtx)
8151 /* While it's always safe to index off the frame pointer, it's not
8152 profitable to do so when the frame pointer is being eliminated. */
8153 if (!reload_completed
8154 && flag_omit_frame_pointer
8155 && !current_function_calls_alloca
8156 && op == frame_pointer_rtx)
8159 return register_operand (op, mode);
8162 /* Return 1 if this operand is anything other than a hard register. */
8165 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8167 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8170 /* Return 1 if INSN branches forward. Should be using insn_addresses
8171 to avoid walking through all the insns... */
8173 forward_branch_p (rtx insn)
8175 rtx label = JUMP_LABEL (insn);
8182 insn = NEXT_INSN (insn);
8185 return (insn == label);
8188 /* Return 1 if OP is an equality comparison, else return 0. */
8190 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8192 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8195 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
8197 movb_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8199 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
8200 || GET_CODE (op) == LT || GET_CODE (op) == GE);
8203 /* Return 1 if INSN is in the delay slot of a call instruction. */
8205 jump_in_call_delay (rtx insn)
8208 if (GET_CODE (insn) != JUMP_INSN)
8211 if (PREV_INSN (insn)
8212 && PREV_INSN (PREV_INSN (insn))
8213 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8215 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8217 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8218 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8225 /* Output an unconditional move and branch insn. */
8228 output_parallel_movb (rtx *operands, int length)
8230 /* These are the cases in which we win. */
8232 return "mov%I1b,tr %1,%0,%2";
8234 /* None of these cases wins, but they don't lose either. */
8235 if (dbr_sequence_length () == 0)
8237 /* Nothing in the delay slot, fake it by putting the combined
8238 insn (the copy or add) in the delay slot of a bl. */
8239 if (GET_CODE (operands[1]) == CONST_INT)
8240 return "b %2\n\tldi %1,%0";
8242 return "b %2\n\tcopy %1,%0";
8246 /* Something in the delay slot, but we've got a long branch. */
8247 if (GET_CODE (operands[1]) == CONST_INT)
8248 return "ldi %1,%0\n\tb %2";
8250 return "copy %1,%0\n\tb %2";
8254 /* Output an unconditional add and branch insn. */
8257 output_parallel_addb (rtx *operands, int length)
8259 /* To make life easy we want operand0 to be the shared input/output
8260 operand and operand1 to be the readonly operand. */
8261 if (operands[0] == operands[1])
8262 operands[1] = operands[2];
8264 /* These are the cases in which we win. */
8266 return "add%I1b,tr %1,%0,%3";
8268 /* None of these cases win, but they don't lose either. */
8269 if (dbr_sequence_length () == 0)
8271 /* Nothing in the delay slot, fake it by putting the combined
8272 insn (the copy or add) in the delay slot of a bl. */
8273 return "b %3\n\tadd%I1 %1,%0,%0";
8277 /* Something in the delay slot, but we've got a long branch. */
8278 return "add%I1 %1,%0,%0\n\tb %3";
8282 /* Return nonzero if INSN (a jump insn) immediately follows a call
8283 to a named function. This is used to avoid filling the delay slot
8284 of the jump since it can usually be eliminated by modifying RP in
8285 the delay slot of the call. */
8288 following_call (rtx insn)
8290 if (! TARGET_JUMP_IN_DELAY)
8293 /* Find the previous real insn, skipping NOTEs. */
8294 insn = PREV_INSN (insn);
8295 while (insn && GET_CODE (insn) == NOTE)
8296 insn = PREV_INSN (insn);
8298 /* Check for CALL_INSNs and millicode calls. */
8300 && ((GET_CODE (insn) == CALL_INSN
8301 && get_attr_type (insn) != TYPE_DYNCALL)
8302 || (GET_CODE (insn) == INSN
8303 && GET_CODE (PATTERN (insn)) != SEQUENCE
8304 && GET_CODE (PATTERN (insn)) != USE
8305 && GET_CODE (PATTERN (insn)) != CLOBBER
8306 && get_attr_type (insn) == TYPE_MILLI)))
8312 /* We use this hook to perform a PA specific optimization which is difficult
8313 to do in earlier passes.
8315 We want the delay slots of branches within jump tables to be filled.
8316 None of the compiler passes at the moment even has the notion that a
8317 PA jump table doesn't contain addresses, but instead contains actual
8320 Because we actually jump into the table, the addresses of each entry
8321 must stay constant in relation to the beginning of the table (which
8322 itself must stay constant relative to the instruction to jump into
8323 it). I don't believe we can guarantee earlier passes of the compiler
8324 will adhere to those rules.
8326 So, late in the compilation process we find all the jump tables, and
8327 expand them into real code -- eg each entry in the jump table vector
8328 will get an appropriate label followed by a jump to the final target.
8330 Reorg and the final jump pass can then optimize these branches and
8331 fill their delay slots. We end up with smaller, more efficient code.
8333 The jump instructions within the table are special; we must be able
8334 to identify them during assembly output (if the jumps don't get filled
8335 we need to emit a nop rather than nullifying the delay slot)). We
8336 identify jumps in switch tables by using insns with the attribute
8337 type TYPE_BTABLE_BRANCH.
8339 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8340 insns. This serves two purposes, first it prevents jump.c from
8341 noticing that the last N entries in the table jump to the instruction
8342 immediately after the table and deleting the jumps. Second, those
8343 insns mark where we should emit .begin_brtab and .end_brtab directives
8344 when using GAS (allows for better link time optimizations). */
8351 remove_useless_addtr_insns (1);
8353 if (pa_cpu < PROCESSOR_8000)
8354 pa_combine_instructions ();
8357 /* This is fairly cheap, so always run it if optimizing. */
8358 if (optimize > 0 && !TARGET_BIG_SWITCH)
8360 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8361 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8363 rtx pattern, tmp, location, label;
8364 unsigned int length, i;
8366 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8367 if (GET_CODE (insn) != JUMP_INSN
8368 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8369 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8372 /* Emit marker for the beginning of the branch table. */
8373 emit_insn_before (gen_begin_brtab (), insn);
8375 pattern = PATTERN (insn);
8376 location = PREV_INSN (insn);
8377 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8379 for (i = 0; i < length; i++)
8381 /* Emit a label before each jump to keep jump.c from
8382 removing this code. */
8383 tmp = gen_label_rtx ();
8384 LABEL_NUSES (tmp) = 1;
8385 emit_label_after (tmp, location);
8386 location = NEXT_INSN (location);
8388 if (GET_CODE (pattern) == ADDR_VEC)
8389 label = XEXP (XVECEXP (pattern, 0, i), 0);
8391 label = XEXP (XVECEXP (pattern, 1, i), 0);
8393 tmp = gen_short_jump (label);
8395 /* Emit the jump itself. */
8396 tmp = emit_jump_insn_after (tmp, location);
8397 JUMP_LABEL (tmp) = label;
8398 LABEL_NUSES (label)++;
8399 location = NEXT_INSN (location);
8401 /* Emit a BARRIER after the jump. */
8402 emit_barrier_after (location);
8403 location = NEXT_INSN (location);
8406 /* Emit marker for the end of the branch table. */
8407 emit_insn_before (gen_end_brtab (), location);
8408 location = NEXT_INSN (location);
8409 emit_barrier_after (location);
8411 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8417 /* Still need brtab marker insns. FIXME: the presence of these
8418 markers disables output of the branch table to readonly memory,
8419 and any alignment directives that might be needed. Possibly,
8420 the begin_brtab insn should be output before the label for the
8421 table. This doesn't matter at the moment since the tables are
8422 always output in the text section. */
8423 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8425 /* Find an ADDR_VEC insn. */
8426 if (GET_CODE (insn) != JUMP_INSN
8427 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8428 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8431 /* Now generate markers for the beginning and end of the
8433 emit_insn_before (gen_begin_brtab (), insn);
8434 emit_insn_after (gen_end_brtab (), insn);
8439 /* The PA has a number of odd instructions which can perform multiple
8440 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8441 it may be profitable to combine two instructions into one instruction
8442 with two outputs. It's not profitable PA2.0 machines because the
8443 two outputs would take two slots in the reorder buffers.
8445 This routine finds instructions which can be combined and combines
8446 them. We only support some of the potential combinations, and we
8447 only try common ways to find suitable instructions.
8449 * addb can add two registers or a register and a small integer
8450 and jump to a nearby (+-8k) location. Normally the jump to the
8451 nearby location is conditional on the result of the add, but by
8452 using the "true" condition we can make the jump unconditional.
8453 Thus addb can perform two independent operations in one insn.
8455 * movb is similar to addb in that it can perform a reg->reg
8456 or small immediate->reg copy and jump to a nearby (+-8k location).
8458 * fmpyadd and fmpysub can perform a FP multiply and either an
8459 FP add or FP sub if the operands of the multiply and add/sub are
8460 independent (there are other minor restrictions). Note both
8461 the fmpy and fadd/fsub can in theory move to better spots according
8462 to data dependencies, but for now we require the fmpy stay at a
8465 * Many of the memory operations can perform pre & post updates
8466 of index registers. GCC's pre/post increment/decrement addressing
8467 is far too simple to take advantage of all the possibilities. This
8468 pass may not be suitable since those insns may not be independent.
8470 * comclr can compare two ints or an int and a register, nullify
8471 the following instruction and zero some other register. This
8472 is more difficult to use as it's harder to find an insn which
8473 will generate a comclr than finding something like an unconditional
8474 branch. (conditional moves & long branches create comclr insns).
8476 * Most arithmetic operations can conditionally skip the next
8477 instruction. They can be viewed as "perform this operation
8478 and conditionally jump to this nearby location" (where nearby
8479 is an insns away). These are difficult to use due to the
8480 branch length restrictions. */
8483 pa_combine_instructions (void)
8487 /* This can get expensive since the basic algorithm is on the
8488 order of O(n^2) (or worse). Only do it for -O2 or higher
8489 levels of optimization. */
8493 /* Walk down the list of insns looking for "anchor" insns which
8494 may be combined with "floating" insns. As the name implies,
8495 "anchor" instructions don't move, while "floating" insns may
8497 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8498 new = make_insn_raw (new);
8500 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8502 enum attr_pa_combine_type anchor_attr;
8503 enum attr_pa_combine_type floater_attr;
8505 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8506 Also ignore any special USE insns. */
8507 if ((GET_CODE (anchor) != INSN
8508 && GET_CODE (anchor) != JUMP_INSN
8509 && GET_CODE (anchor) != CALL_INSN)
8510 || GET_CODE (PATTERN (anchor)) == USE
8511 || GET_CODE (PATTERN (anchor)) == CLOBBER
8512 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8513 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8516 anchor_attr = get_attr_pa_combine_type (anchor);
8517 /* See if anchor is an insn suitable for combination. */
8518 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8519 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8520 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8521 && ! forward_branch_p (anchor)))
8525 for (floater = PREV_INSN (anchor);
8527 floater = PREV_INSN (floater))
8529 if (GET_CODE (floater) == NOTE
8530 || (GET_CODE (floater) == INSN
8531 && (GET_CODE (PATTERN (floater)) == USE
8532 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8535 /* Anything except a regular INSN will stop our search. */
8536 if (GET_CODE (floater) != INSN
8537 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8538 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8544 /* See if FLOATER is suitable for combination with the
8546 floater_attr = get_attr_pa_combine_type (floater);
8547 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8548 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8549 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8550 && floater_attr == PA_COMBINE_TYPE_FMPY))
8552 /* If ANCHOR and FLOATER can be combined, then we're
8553 done with this pass. */
8554 if (pa_can_combine_p (new, anchor, floater, 0,
8555 SET_DEST (PATTERN (floater)),
8556 XEXP (SET_SRC (PATTERN (floater)), 0),
8557 XEXP (SET_SRC (PATTERN (floater)), 1)))
8561 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8562 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8564 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8566 if (pa_can_combine_p (new, anchor, floater, 0,
8567 SET_DEST (PATTERN (floater)),
8568 XEXP (SET_SRC (PATTERN (floater)), 0),
8569 XEXP (SET_SRC (PATTERN (floater)), 1)))
8574 if (pa_can_combine_p (new, anchor, floater, 0,
8575 SET_DEST (PATTERN (floater)),
8576 SET_SRC (PATTERN (floater)),
8577 SET_SRC (PATTERN (floater))))
8583 /* If we didn't find anything on the backwards scan try forwards. */
8585 && (anchor_attr == PA_COMBINE_TYPE_FMPY
8586 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8588 for (floater = anchor; floater; floater = NEXT_INSN (floater))
8590 if (GET_CODE (floater) == NOTE
8591 || (GET_CODE (floater) == INSN
8592 && (GET_CODE (PATTERN (floater)) == USE
8593 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8597 /* Anything except a regular INSN will stop our search. */
8598 if (GET_CODE (floater) != INSN
8599 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8600 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8606 /* See if FLOATER is suitable for combination with the
8608 floater_attr = get_attr_pa_combine_type (floater);
8609 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8610 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8611 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8612 && floater_attr == PA_COMBINE_TYPE_FMPY))
8614 /* If ANCHOR and FLOATER can be combined, then we're
8615 done with this pass. */
8616 if (pa_can_combine_p (new, anchor, floater, 1,
8617 SET_DEST (PATTERN (floater)),
8618 XEXP (SET_SRC (PATTERN (floater)),
8620 XEXP (SET_SRC (PATTERN (floater)),
8627 /* FLOATER will be nonzero if we found a suitable floating
8628 insn for combination with ANCHOR. */
8630 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8631 || anchor_attr == PA_COMBINE_TYPE_FMPY))
8633 /* Emit the new instruction and delete the old anchor. */
8634 emit_insn_before (gen_rtx_PARALLEL
8636 gen_rtvec (2, PATTERN (anchor),
8637 PATTERN (floater))),
8640 PUT_CODE (anchor, NOTE);
8641 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8642 NOTE_SOURCE_FILE (anchor) = 0;
8644 /* Emit a special USE insn for FLOATER, then delete
8645 the floating insn. */
8646 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8647 delete_insn (floater);
8652 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8655 /* Emit the new_jump instruction and delete the old anchor. */
8657 = emit_jump_insn_before (gen_rtx_PARALLEL
8659 gen_rtvec (2, PATTERN (anchor),
8660 PATTERN (floater))),
8663 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8664 PUT_CODE (anchor, NOTE);
8665 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8666 NOTE_SOURCE_FILE (anchor) = 0;
8668 /* Emit a special USE insn for FLOATER, then delete
8669 the floating insn. */
8670 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8671 delete_insn (floater);
8679 pa_can_combine_p (rtx new, rtx anchor, rtx floater, int reversed, rtx dest,
8682 int insn_code_number;
8685 /* Create a PARALLEL with the patterns of ANCHOR and
8686 FLOATER, try to recognize it, then test constraints
8687 for the resulting pattern.
8689 If the pattern doesn't match or the constraints
8690 aren't met keep searching for a suitable floater
8692 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8693 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8694 INSN_CODE (new) = -1;
8695 insn_code_number = recog_memoized (new);
8696 if (insn_code_number < 0
8697 || (extract_insn (new), ! constrain_operands (1)))
8711 /* There's up to three operands to consider. One
8712 output and two inputs.
8714 The output must not be used between FLOATER & ANCHOR
8715 exclusive. The inputs must not be set between
8716 FLOATER and ANCHOR exclusive. */
8718 if (reg_used_between_p (dest, start, end))
8721 if (reg_set_between_p (src1, start, end))
8724 if (reg_set_between_p (src2, start, end))
8727 /* If we get here, then everything is good. */
8731 /* Return nonzero if references for INSN are delayed.
8733 Millicode insns are actually function calls with some special
8734 constraints on arguments and register usage.
8736 Millicode calls always expect their arguments in the integer argument
8737 registers, and always return their result in %r29 (ret1). They
8738 are expected to clobber their arguments, %r1, %r29, and the return
8739 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8741 This function tells reorg that the references to arguments and
8742 millicode calls do not appear to happen until after the millicode call.
8743 This allows reorg to put insns which set the argument registers into the
8744 delay slot of the millicode call -- thus they act more like traditional
8747 Note we can not consider side effects of the insn to be delayed because
8748 the branch and link insn will clobber the return pointer. If we happened
8749 to use the return pointer in the delay slot of the call, then we lose.
8751 get_attr_type will try to recognize the given insn, so make sure to
8752 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8755 insn_refs_are_delayed (rtx insn)
8757 return ((GET_CODE (insn) == INSN
8758 && GET_CODE (PATTERN (insn)) != SEQUENCE
8759 && GET_CODE (PATTERN (insn)) != USE
8760 && GET_CODE (PATTERN (insn)) != CLOBBER
8761 && get_attr_type (insn) == TYPE_MILLI));
8764 /* On the HP-PA the value is found in register(s) 28(-29), unless
8765 the mode is SF or DF. Then the value is returned in fr4 (32).
8767 This must perform the same promotions as PROMOTE_MODE, else
8768 TARGET_PROMOTE_FUNCTION_RETURN will not work correctly.
8770 Small structures must be returned in a PARALLEL on PA64 in order
8771 to match the HP Compiler ABI. */
8774 function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
8776 enum machine_mode valmode;
8778 /* Aggregates with a size less than or equal to 128 bits are returned
8779 in GR 28(-29). They are left justified. The pad bits are undefined.
8780 Larger aggregates are returned in memory. */
8781 if (TARGET_64BIT && AGGREGATE_TYPE_P (valtype))
8785 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
8787 for (i = 0; i < ub; i++)
8789 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8790 gen_rtx_REG (DImode, 28 + i),
8795 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
8798 if ((INTEGRAL_TYPE_P (valtype)
8799 && TYPE_PRECISION (valtype) < BITS_PER_WORD)
8800 || POINTER_TYPE_P (valtype))
8801 valmode = word_mode;
8803 valmode = TYPE_MODE (valtype);
8805 if (TREE_CODE (valtype) == REAL_TYPE
8806 && TYPE_MODE (valtype) != TFmode
8807 && !TARGET_SOFT_FLOAT)
8808 return gen_rtx_REG (valmode, 32);
8810 return gen_rtx_REG (valmode, 28);
8813 /* Return the location of a parameter that is passed in a register or NULL
8814 if the parameter has any component that is passed in memory.
8816 This is new code and will be pushed to into the net sources after
8819 ??? We might want to restructure this so that it looks more like other
8822 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
8823 int named ATTRIBUTE_UNUSED)
8825 int max_arg_words = (TARGET_64BIT ? 8 : 4);
8832 if (mode == VOIDmode)
8835 arg_size = FUNCTION_ARG_SIZE (mode, type);
8837 /* If this arg would be passed partially or totally on the stack, then
8838 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
8839 handle arguments which are split between regs and stack slots if
8840 the ABI mandates split arguments. */
8843 /* The 32-bit ABI does not split arguments. */
8844 if (cum->words + arg_size > max_arg_words)
8850 alignment = cum->words & 1;
8851 if (cum->words + alignment >= max_arg_words)
8855 /* The 32bit ABIs and the 64bit ABIs are rather different,
8856 particularly in their handling of FP registers. We might
8857 be able to cleverly share code between them, but I'm not
8858 going to bother in the hope that splitting them up results
8859 in code that is more easily understood. */
8863 /* Advance the base registers to their current locations.
8865 Remember, gprs grow towards smaller register numbers while
8866 fprs grow to higher register numbers. Also remember that
8867 although FP regs are 32-bit addressable, we pretend that
8868 the registers are 64-bits wide. */
8869 gpr_reg_base = 26 - cum->words;
8870 fpr_reg_base = 32 + cum->words;
8872 /* Arguments wider than one word and small aggregates need special
8876 || (type && AGGREGATE_TYPE_P (type)))
8878 /* Double-extended precision (80-bit), quad-precision (128-bit)
8879 and aggregates including complex numbers are aligned on
8880 128-bit boundaries. The first eight 64-bit argument slots
8881 are associated one-to-one, with general registers r26
8882 through r19, and also with floating-point registers fr4
8883 through fr11. Arguments larger than one word are always
8884 passed in general registers.
8886 Using a PARALLEL with a word mode register results in left
8887 justified data on a big-endian target. */
8890 int i, offset = 0, ub = arg_size;
8892 /* Align the base register. */
8893 gpr_reg_base -= alignment;
8895 ub = MIN (ub, max_arg_words - cum->words - alignment);
8896 for (i = 0; i < ub; i++)
8898 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8899 gen_rtx_REG (DImode, gpr_reg_base),
8905 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
8910 /* If the argument is larger than a word, then we know precisely
8911 which registers we must use. */
8925 /* Structures 5 to 8 bytes in size are passed in the general
8926 registers in the same manner as other non floating-point
8927 objects. The data is right-justified and zero-extended
8930 This is magic. Normally, using a PARALLEL results in left
8931 justified data on a big-endian target. However, using a
8932 single double-word register provides the required right
8933 justification for 5 to 8 byte structures. This has nothing
8934 to do with the direction of padding specified for the argument.
8935 It has to do with how the data is widened and shifted into
8936 and from the register.
8938 Aside from adding load_multiple and store_multiple patterns,
8939 this is the only way that I have found to obtain right
8940 justification of BLKmode data when it has a size greater
8941 than one word. Splitting the operation into two SImode loads
8942 or returning a DImode REG results in left justified data. */
8943 if (mode == BLKmode)
8945 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
8946 gen_rtx_REG (DImode, gpr_reg_base),
8948 return gen_rtx_PARALLEL (mode, gen_rtvec (1, loc));
8953 /* We have a single word (32 bits). A simple computation
8954 will get us the register #s we need. */
8955 gpr_reg_base = 26 - cum->words;
8956 fpr_reg_base = 32 + 2 * cum->words;
8960 /* Determine if the argument needs to be passed in both general and
8961 floating point registers. */
8962 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
8963 /* If we are doing soft-float with portable runtime, then there
8964 is no need to worry about FP regs. */
8965 && !TARGET_SOFT_FLOAT
8966 /* The parameter must be some kind of float, else we can just
8967 pass it in integer registers. */
8968 && FLOAT_MODE_P (mode)
8969 /* The target function must not have a prototype. */
8970 && cum->nargs_prototype <= 0
8971 /* libcalls do not need to pass items in both FP and general
8973 && type != NULL_TREE
8974 /* All this hair applies to "outgoing" args only. This includes
8975 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
8977 /* Also pass outgoing floating arguments in both registers in indirect
8978 calls with the 32 bit ABI and the HP assembler since there is no
8979 way to the specify argument locations in static functions. */
8984 && FLOAT_MODE_P (mode)))
8990 gen_rtx_EXPR_LIST (VOIDmode,
8991 gen_rtx_REG (mode, fpr_reg_base),
8993 gen_rtx_EXPR_LIST (VOIDmode,
8994 gen_rtx_REG (mode, gpr_reg_base),
8999 /* See if we should pass this parameter in a general register. */
9000 if (TARGET_SOFT_FLOAT
9001 /* Indirect calls in the normal 32bit ABI require all arguments
9002 to be passed in general registers. */
9003 || (!TARGET_PORTABLE_RUNTIME
9007 /* If the parameter is not a floating point parameter, then
9008 it belongs in GPRs. */
9009 || !FLOAT_MODE_P (mode))
9010 retval = gen_rtx_REG (mode, gpr_reg_base);
9012 retval = gen_rtx_REG (mode, fpr_reg_base);
9018 /* If this arg would be passed totally in registers or totally on the stack,
9019 then this routine should return zero. It is currently called only for
9020 the 64-bit target. */
9022 function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9023 tree type, int named ATTRIBUTE_UNUSED)
9025 unsigned int max_arg_words = 8;
9026 unsigned int offset = 0;
9028 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9031 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9032 /* Arg fits fully into registers. */
9034 else if (cum->words + offset >= max_arg_words)
9035 /* Arg fully on the stack. */
9039 return max_arg_words - cum->words - offset;
9043 /* Return 1 if this is a comparison operator. This allows the use of
9044 MATCH_OPERATOR to recognize all the branch insns. */
9047 cmpib_comparison_operator (rtx op, enum machine_mode mode)
9049 return ((mode == VOIDmode || GET_MODE (op) == mode)
9050 && (GET_CODE (op) == EQ
9051 || GET_CODE (op) == NE
9052 || GET_CODE (op) == GT
9053 || GET_CODE (op) == GTU
9054 || GET_CODE (op) == GE
9055 || GET_CODE (op) == LT
9056 || GET_CODE (op) == LE
9057 || GET_CODE (op) == LEU));
9060 /* On hpux10, the linker will give an error if we have a reference
9061 in the read-only data section to a symbol defined in a shared
9062 library. Therefore, expressions that might require a reloc can
9063 not be placed in the read-only data section. */
9066 pa_select_section (tree exp, int reloc,
9067 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9069 if (TREE_CODE (exp) == VAR_DECL
9070 && TREE_READONLY (exp)
9071 && !TREE_THIS_VOLATILE (exp)
9072 && DECL_INITIAL (exp)
9073 && (DECL_INITIAL (exp) == error_mark_node
9074 || TREE_CONSTANT (DECL_INITIAL (exp)))
9076 readonly_data_section ();
9077 else if (TREE_CODE_CLASS (TREE_CODE (exp)) == 'c'
9079 readonly_data_section ();
9085 pa_globalize_label (FILE *stream, const char *name)
9087 /* We only handle DATA objects here, functions are globalized in
9088 ASM_DECLARE_FUNCTION_NAME. */
9089 if (! FUNCTION_NAME_P (name))
9091 fputs ("\t.EXPORT ", stream);
9092 assemble_name (stream, name);
9093 fputs (",DATA\n", stream);
9097 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9100 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9101 int incoming ATTRIBUTE_UNUSED)
9103 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9106 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9109 pa_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
9111 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9112 PA64 ABI says that objects larger than 128 bits are returned in memory.
9113 Note, int_size_in_bytes can return -1 if the size of the object is
9114 variable or larger than the maximum value that can be expressed as
9115 a HOST_WIDE_INT. It can also return zero for an empty type. The
9116 simplest way to handle variable and empty types is to pass them in
9117 memory. This avoids problems in defining the boundaries of argument
9118 slots, allocating registers, etc. */
9119 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9120 || int_size_in_bytes (type) <= 0);