1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "insn-attr.h"
40 #include "integrate.h"
48 #include "target-def.h"
51 /* Return nonzero if there is a bypass for the output of
52 OUT_INSN and the fp store IN_INSN. */
54 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
56 enum machine_mode store_mode;
57 enum machine_mode other_mode;
60 if (recog_memoized (in_insn) < 0
61 || (get_attr_type (in_insn) != TYPE_FPSTORE
62 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
63 || recog_memoized (out_insn) < 0)
66 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
68 set = single_set (out_insn);
72 other_mode = GET_MODE (SET_SRC (set));
74 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
78 #ifndef DO_FRAME_NOTES
79 #ifdef INCOMING_RETURN_ADDR_RTX
80 #define DO_FRAME_NOTES 1
82 #define DO_FRAME_NOTES 0
86 static void copy_reg_pointer (rtx, rtx);
87 static void fix_range (const char *);
88 static bool pa_handle_option (size_t, const char *, int);
89 static int hppa_address_cost (rtx);
90 static bool hppa_rtx_costs (rtx, int, int, int *);
91 static inline rtx force_mode (enum machine_mode, rtx);
92 static void pa_reorg (void);
93 static void pa_combine_instructions (void);
94 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
95 static int forward_branch_p (rtx);
96 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
97 static int compute_movmem_length (rtx);
98 static int compute_clrmem_length (rtx);
99 static bool pa_assemble_integer (rtx, unsigned int, int);
100 static void remove_useless_addtr_insns (int);
101 static void store_reg (int, HOST_WIDE_INT, int);
102 static void store_reg_modify (int, int, HOST_WIDE_INT);
103 static void load_reg (int, HOST_WIDE_INT, int);
104 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
105 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
106 static void update_total_code_bytes (int);
107 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
108 static int pa_adjust_cost (rtx, rtx, rtx, int);
109 static int pa_adjust_priority (rtx, int);
110 static int pa_issue_rate (void);
111 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
112 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
114 static void pa_encode_section_info (tree, rtx, int);
115 static const char *pa_strip_name_encoding (const char *);
116 static bool pa_function_ok_for_sibcall (tree, tree);
117 static void pa_globalize_label (FILE *, const char *)
119 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
120 HOST_WIDE_INT, tree);
121 #if !defined(USE_COLLECT2)
122 static void pa_asm_out_constructor (rtx, int);
123 static void pa_asm_out_destructor (rtx, int);
125 static void pa_init_builtins (void);
126 static rtx hppa_builtin_saveregs (void);
127 static void hppa_va_start (tree, rtx);
128 static tree hppa_gimplify_va_arg_expr (tree, tree, tree *, tree *);
129 static bool pa_scalar_mode_supported_p (enum machine_mode);
130 static bool pa_commutative_p (const_rtx x, int outer_code);
131 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
132 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
133 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
134 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
135 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
136 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
137 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
138 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
139 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
140 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
141 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
142 static void output_deferred_plabels (void);
143 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
144 #ifdef ASM_OUTPUT_EXTERNAL_REAL
145 static void pa_hpux_file_end (void);
147 #ifdef HPUX_LONG_DOUBLE_LIBRARY
148 static void pa_hpux_init_libfuncs (void);
150 static rtx pa_struct_value_rtx (tree, int);
151 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
153 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
155 static struct machine_function * pa_init_machine_status (void);
156 static enum reg_class pa_secondary_reload (bool, rtx, enum reg_class,
158 secondary_reload_info *);
159 static void pa_extra_live_on_entry (bitmap);
161 /* The following extra sections are only used for SOM. */
162 static GTY(()) section *som_readonly_data_section;
163 static GTY(()) section *som_one_only_readonly_data_section;
164 static GTY(()) section *som_one_only_data_section;
166 /* Save the operands last given to a compare for use when we
167 generate a scc or bcc insn. */
168 rtx hppa_compare_op0, hppa_compare_op1;
169 enum cmp_type hppa_branch_type;
171 /* Which cpu we are scheduling for. */
172 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
174 /* The UNIX standard to use for predefines and linking. */
175 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
177 /* Counts for the number of callee-saved general and floating point
178 registers which were saved by the current function's prologue. */
179 static int gr_saved, fr_saved;
181 /* Boolean indicating whether the return pointer was saved by the
182 current function's prologue. */
183 static bool rp_saved;
185 static rtx find_addr_reg (rtx);
187 /* Keep track of the number of bytes we have output in the CODE subspace
188 during this compilation so we'll know when to emit inline long-calls. */
189 unsigned long total_code_bytes;
191 /* The last address of the previous function plus the number of bytes in
192 associated thunks that have been output. This is used to determine if
193 a thunk can use an IA-relative branch to reach its target function. */
194 static int last_address;
196 /* Variables to handle plabels that we discover are necessary at assembly
197 output time. They are output after the current function. */
198 struct deferred_plabel GTY(())
203 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
205 static size_t n_deferred_plabels = 0;
208 /* Initialize the GCC target structure. */
210 #undef TARGET_ASM_ALIGNED_HI_OP
211 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
212 #undef TARGET_ASM_ALIGNED_SI_OP
213 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
214 #undef TARGET_ASM_ALIGNED_DI_OP
215 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
216 #undef TARGET_ASM_UNALIGNED_HI_OP
217 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
218 #undef TARGET_ASM_UNALIGNED_SI_OP
219 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
220 #undef TARGET_ASM_UNALIGNED_DI_OP
221 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
222 #undef TARGET_ASM_INTEGER
223 #define TARGET_ASM_INTEGER pa_assemble_integer
225 #undef TARGET_ASM_FUNCTION_PROLOGUE
226 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
227 #undef TARGET_ASM_FUNCTION_EPILOGUE
228 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
230 #undef TARGET_SCHED_ADJUST_COST
231 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
232 #undef TARGET_SCHED_ADJUST_PRIORITY
233 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
234 #undef TARGET_SCHED_ISSUE_RATE
235 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
237 #undef TARGET_ENCODE_SECTION_INFO
238 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
239 #undef TARGET_STRIP_NAME_ENCODING
240 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
242 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
243 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
245 #undef TARGET_COMMUTATIVE_P
246 #define TARGET_COMMUTATIVE_P pa_commutative_p
248 #undef TARGET_ASM_OUTPUT_MI_THUNK
249 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
250 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
251 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
253 #undef TARGET_ASM_FILE_END
254 #ifdef ASM_OUTPUT_EXTERNAL_REAL
255 #define TARGET_ASM_FILE_END pa_hpux_file_end
257 #define TARGET_ASM_FILE_END output_deferred_plabels
260 #if !defined(USE_COLLECT2)
261 #undef TARGET_ASM_CONSTRUCTOR
262 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
263 #undef TARGET_ASM_DESTRUCTOR
264 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
267 #undef TARGET_DEFAULT_TARGET_FLAGS
268 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
269 #undef TARGET_HANDLE_OPTION
270 #define TARGET_HANDLE_OPTION pa_handle_option
272 #undef TARGET_INIT_BUILTINS
273 #define TARGET_INIT_BUILTINS pa_init_builtins
275 #undef TARGET_RTX_COSTS
276 #define TARGET_RTX_COSTS hppa_rtx_costs
277 #undef TARGET_ADDRESS_COST
278 #define TARGET_ADDRESS_COST hppa_address_cost
280 #undef TARGET_MACHINE_DEPENDENT_REORG
281 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
283 #ifdef HPUX_LONG_DOUBLE_LIBRARY
284 #undef TARGET_INIT_LIBFUNCS
285 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
288 #undef TARGET_PROMOTE_FUNCTION_RETURN
289 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
290 #undef TARGET_PROMOTE_PROTOTYPES
291 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
293 #undef TARGET_STRUCT_VALUE_RTX
294 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
295 #undef TARGET_RETURN_IN_MEMORY
296 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
297 #undef TARGET_MUST_PASS_IN_STACK
298 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
299 #undef TARGET_PASS_BY_REFERENCE
300 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
301 #undef TARGET_CALLEE_COPIES
302 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
303 #undef TARGET_ARG_PARTIAL_BYTES
304 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
306 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
307 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
308 #undef TARGET_EXPAND_BUILTIN_VA_START
309 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
310 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
311 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
313 #undef TARGET_SCALAR_MODE_SUPPORTED_P
314 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
316 #undef TARGET_CANNOT_FORCE_CONST_MEM
317 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
319 #undef TARGET_SECONDARY_RELOAD
320 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
322 #undef TARGET_EXTRA_LIVE_ON_ENTRY
323 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
325 struct gcc_target targetm = TARGET_INITIALIZER;
327 /* Parse the -mfixed-range= option string. */
330 fix_range (const char *const_str)
333 char *str, *dash, *comma;
335 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
336 REG2 are either register names or register numbers. The effect
337 of this option is to mark the registers in the range from REG1 to
338 REG2 as ``fixed'' so they won't be used by the compiler. This is
339 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
341 i = strlen (const_str);
342 str = (char *) alloca (i + 1);
343 memcpy (str, const_str, i + 1);
347 dash = strchr (str, '-');
350 warning (0, "value of -mfixed-range must have form REG1-REG2");
355 comma = strchr (dash + 1, ',');
359 first = decode_reg_name (str);
362 warning (0, "unknown register name: %s", str);
366 last = decode_reg_name (dash + 1);
369 warning (0, "unknown register name: %s", dash + 1);
377 warning (0, "%s-%s is an empty range", str, dash + 1);
381 for (i = first; i <= last; ++i)
382 fixed_regs[i] = call_used_regs[i] = 1;
391 /* Check if all floating point registers have been fixed. */
392 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
397 target_flags |= MASK_DISABLE_FPREGS;
400 /* Implement TARGET_HANDLE_OPTION. */
403 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
408 case OPT_mpa_risc_1_0:
410 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
414 case OPT_mpa_risc_1_1:
416 target_flags &= ~MASK_PA_20;
417 target_flags |= MASK_PA_11;
420 case OPT_mpa_risc_2_0:
422 target_flags |= MASK_PA_11 | MASK_PA_20;
426 if (strcmp (arg, "8000") == 0)
427 pa_cpu = PROCESSOR_8000;
428 else if (strcmp (arg, "7100") == 0)
429 pa_cpu = PROCESSOR_7100;
430 else if (strcmp (arg, "700") == 0)
431 pa_cpu = PROCESSOR_700;
432 else if (strcmp (arg, "7100LC") == 0)
433 pa_cpu = PROCESSOR_7100LC;
434 else if (strcmp (arg, "7200") == 0)
435 pa_cpu = PROCESSOR_7200;
436 else if (strcmp (arg, "7300") == 0)
437 pa_cpu = PROCESSOR_7300;
442 case OPT_mfixed_range_:
452 #if TARGET_HPUX_10_10
458 #if TARGET_HPUX_11_11
470 override_options (void)
472 /* Unconditional branches in the delay slot are not compatible with dwarf2
473 call frame information. There is no benefit in using this optimization
474 on PA8000 and later processors. */
475 if (pa_cpu >= PROCESSOR_8000
476 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
477 || flag_unwind_tables)
478 target_flags &= ~MASK_JUMP_IN_DELAY;
480 if (flag_pic && TARGET_PORTABLE_RUNTIME)
482 warning (0, "PIC code generation is not supported in the portable runtime model");
485 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
487 warning (0, "PIC code generation is not compatible with fast indirect calls");
490 if (! TARGET_GAS && write_symbols != NO_DEBUG)
492 warning (0, "-g is only supported when using GAS on this processor,");
493 warning (0, "-g option disabled");
494 write_symbols = NO_DEBUG;
497 /* We only support the "big PIC" model now. And we always generate PIC
498 code when in 64bit mode. */
499 if (flag_pic == 1 || TARGET_64BIT)
502 /* We can't guarantee that .dword is available for 32-bit targets. */
503 if (UNITS_PER_WORD == 4)
504 targetm.asm_out.aligned_op.di = NULL;
506 /* The unaligned ops are only available when using GAS. */
509 targetm.asm_out.unaligned_op.hi = NULL;
510 targetm.asm_out.unaligned_op.si = NULL;
511 targetm.asm_out.unaligned_op.di = NULL;
514 init_machine_status = pa_init_machine_status;
518 pa_init_builtins (void)
520 #ifdef DONT_HAVE_FPUTC_UNLOCKED
521 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
522 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
523 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
524 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
527 if (built_in_decls [BUILT_IN_FINITE])
528 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
529 if (built_in_decls [BUILT_IN_FINITEF])
530 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
534 /* Function to init struct machine_function.
535 This will be called, via a pointer variable,
536 from push_function_context. */
538 static struct machine_function *
539 pa_init_machine_status (void)
541 return ggc_alloc_cleared (sizeof (machine_function));
544 /* If FROM is a probable pointer register, mark TO as a probable
545 pointer register with the same pointer alignment as FROM. */
548 copy_reg_pointer (rtx to, rtx from)
550 if (REG_POINTER (from))
551 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
554 /* Return 1 if X contains a symbolic expression. We know these
555 expressions will have one of a few well defined forms, so
556 we need only check those forms. */
558 symbolic_expression_p (rtx x)
561 /* Strip off any HIGH. */
562 if (GET_CODE (x) == HIGH)
565 return (symbolic_operand (x, VOIDmode));
568 /* Accept any constant that can be moved in one instruction into a
571 cint_ok_for_move (HOST_WIDE_INT ival)
573 /* OK if ldo, ldil, or zdepi, can be used. */
574 return (VAL_14_BITS_P (ival)
575 || ldil_cint_p (ival)
576 || zdepi_cint_p (ival));
579 /* Return truth value of whether OP can be used as an operand in a
582 adddi3_operand (rtx op, enum machine_mode mode)
584 return (register_operand (op, mode)
585 || (GET_CODE (op) == CONST_INT
586 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
589 /* True iff the operand OP can be used as the destination operand of
590 an integer store. This also implies the operand could be used as
591 the source operand of an integer load. Symbolic, lo_sum and indexed
592 memory operands are not allowed. We accept reloading pseudos and
593 other memory operands. */
595 integer_store_memory_operand (rtx op, enum machine_mode mode)
597 return ((reload_in_progress
599 && REGNO (op) >= FIRST_PSEUDO_REGISTER
600 && reg_renumber [REGNO (op)] < 0)
601 || (GET_CODE (op) == MEM
602 && (reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
603 && !symbolic_memory_operand (op, VOIDmode)
604 && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
605 && !IS_INDEX_ADDR_P (XEXP (op, 0))));
608 /* True iff ldil can be used to load this CONST_INT. The least
609 significant 11 bits of the value must be zero and the value must
610 not change sign when extended from 32 to 64 bits. */
612 ldil_cint_p (HOST_WIDE_INT ival)
614 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
616 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
619 /* True iff zdepi can be used to generate this CONST_INT.
620 zdepi first sign extends a 5-bit signed number to a given field
621 length, then places this field anywhere in a zero. */
623 zdepi_cint_p (unsigned HOST_WIDE_INT x)
625 unsigned HOST_WIDE_INT lsb_mask, t;
627 /* This might not be obvious, but it's at least fast.
628 This function is critical; we don't have the time loops would take. */
630 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
631 /* Return true iff t is a power of two. */
632 return ((t & (t - 1)) == 0);
635 /* True iff depi or extru can be used to compute (reg & mask).
636 Accept bit pattern like these:
641 and_mask_p (unsigned HOST_WIDE_INT mask)
644 mask += mask & -mask;
645 return (mask & (mask - 1)) == 0;
648 /* True iff depi can be used to compute (reg | MASK). */
650 ior_mask_p (unsigned HOST_WIDE_INT mask)
652 mask += mask & -mask;
653 return (mask & (mask - 1)) == 0;
656 /* Legitimize PIC addresses. If the address is already
657 position-independent, we return ORIG. Newly generated
658 position-independent addresses go to REG. If we need more
659 than one register, we lose. */
662 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
666 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
668 /* Labels need special handling. */
669 if (pic_label_operand (orig, mode))
671 /* We do not want to go through the movXX expanders here since that
672 would create recursion.
674 Nor do we really want to call a generator for a named pattern
675 since that requires multiple patterns if we want to support
678 So instead we just emit the raw set, which avoids the movXX
679 expanders completely. */
680 mark_reg_pointer (reg, BITS_PER_UNIT);
681 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
682 current_function_uses_pic_offset_table = 1;
685 if (GET_CODE (orig) == SYMBOL_REF)
691 /* Before reload, allocate a temporary register for the intermediate
692 result. This allows the sequence to be deleted when the final
693 result is unused and the insns are trivially dead. */
694 tmp_reg = ((reload_in_progress || reload_completed)
695 ? reg : gen_reg_rtx (Pmode));
697 emit_move_insn (tmp_reg,
698 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
699 gen_rtx_HIGH (word_mode, orig)));
701 = gen_const_mem (Pmode,
702 gen_rtx_LO_SUM (Pmode, tmp_reg,
703 gen_rtx_UNSPEC (Pmode,
707 current_function_uses_pic_offset_table = 1;
708 mark_reg_pointer (reg, BITS_PER_UNIT);
709 insn = emit_move_insn (reg, pic_ref);
711 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
712 set_unique_reg_note (insn, REG_EQUAL, orig);
716 else if (GET_CODE (orig) == CONST)
720 if (GET_CODE (XEXP (orig, 0)) == PLUS
721 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
725 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
727 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
728 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
729 base == reg ? 0 : reg);
731 if (GET_CODE (orig) == CONST_INT)
733 if (INT_14_BITS (orig))
734 return plus_constant (base, INTVAL (orig));
735 orig = force_reg (Pmode, orig);
737 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
738 /* Likewise, should we set special REG_NOTEs here? */
744 static GTY(()) rtx gen_tls_tga;
747 gen_tls_get_addr (void)
750 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
755 hppa_tls_call (rtx arg)
759 ret = gen_reg_rtx (Pmode);
760 emit_library_call_value (gen_tls_get_addr (), ret,
761 LCT_CONST, Pmode, 1, arg, Pmode);
767 legitimize_tls_address (rtx addr)
769 rtx ret, insn, tmp, t1, t2, tp;
770 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
774 case TLS_MODEL_GLOBAL_DYNAMIC:
775 tmp = gen_reg_rtx (Pmode);
777 emit_insn (gen_tgd_load_pic (tmp, addr));
779 emit_insn (gen_tgd_load (tmp, addr));
780 ret = hppa_tls_call (tmp);
783 case TLS_MODEL_LOCAL_DYNAMIC:
784 ret = gen_reg_rtx (Pmode);
785 tmp = gen_reg_rtx (Pmode);
788 emit_insn (gen_tld_load_pic (tmp, addr));
790 emit_insn (gen_tld_load (tmp, addr));
791 t1 = hppa_tls_call (tmp);
794 t2 = gen_reg_rtx (Pmode);
795 emit_libcall_block (insn, t2, t1,
796 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
798 emit_insn (gen_tld_offset_load (ret, addr, t2));
801 case TLS_MODEL_INITIAL_EXEC:
802 tp = gen_reg_rtx (Pmode);
803 tmp = gen_reg_rtx (Pmode);
804 ret = gen_reg_rtx (Pmode);
805 emit_insn (gen_tp_load (tp));
807 emit_insn (gen_tie_load_pic (tmp, addr));
809 emit_insn (gen_tie_load (tmp, addr));
810 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
813 case TLS_MODEL_LOCAL_EXEC:
814 tp = gen_reg_rtx (Pmode);
815 ret = gen_reg_rtx (Pmode);
816 emit_insn (gen_tp_load (tp));
817 emit_insn (gen_tle_load (ret, addr, tp));
827 /* Try machine-dependent ways of modifying an illegitimate address
828 to be legitimate. If we find one, return the new, valid address.
829 This macro is used in only one place: `memory_address' in explow.c.
831 OLDX is the address as it was before break_out_memory_refs was called.
832 In some cases it is useful to look at this to decide what needs to be done.
834 MODE and WIN are passed so that this macro can use
835 GO_IF_LEGITIMATE_ADDRESS.
837 It is always safe for this macro to do nothing. It exists to recognize
838 opportunities to optimize the output.
840 For the PA, transform:
842 memory(X + <large int>)
846 if (<large int> & mask) >= 16
847 Y = (<large int> & ~mask) + mask + 1 Round up.
849 Y = (<large int> & ~mask) Round down.
851 memory (Z + (<large int> - Y));
853 This is for CSE to find several similar references, and only use one Z.
855 X can either be a SYMBOL_REF or REG, but because combine cannot
856 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
857 D will not fit in 14 bits.
859 MODE_FLOAT references allow displacements which fit in 5 bits, so use
862 MODE_INT references allow displacements which fit in 14 bits, so use
865 This relies on the fact that most mode MODE_FLOAT references will use FP
866 registers and most mode MODE_INT references will use integer registers.
867 (In the rare case of an FP register used in an integer MODE, we depend
868 on secondary reloads to clean things up.)
871 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
872 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
873 addressing modes to be used).
875 Put X and Z into registers. Then put the entire expression into
879 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
880 enum machine_mode mode)
884 /* We need to canonicalize the order of operands in unscaled indexed
885 addresses since the code that checks if an address is valid doesn't
886 always try both orders. */
887 if (!TARGET_NO_SPACE_REGS
888 && GET_CODE (x) == PLUS
889 && GET_MODE (x) == Pmode
890 && REG_P (XEXP (x, 0))
891 && REG_P (XEXP (x, 1))
892 && REG_POINTER (XEXP (x, 0))
893 && !REG_POINTER (XEXP (x, 1)))
894 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
896 if (PA_SYMBOL_REF_TLS_P (x))
897 return legitimize_tls_address (x);
899 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
901 /* Strip off CONST. */
902 if (GET_CODE (x) == CONST)
905 /* Special case. Get the SYMBOL_REF into a register and use indexing.
906 That should always be safe. */
907 if (GET_CODE (x) == PLUS
908 && GET_CODE (XEXP (x, 0)) == REG
909 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
911 rtx reg = force_reg (Pmode, XEXP (x, 1));
912 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
915 /* Note we must reject symbols which represent function addresses
916 since the assembler/linker can't handle arithmetic on plabels. */
917 if (GET_CODE (x) == PLUS
918 && GET_CODE (XEXP (x, 1)) == CONST_INT
919 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
920 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
921 || GET_CODE (XEXP (x, 0)) == REG))
923 rtx int_part, ptr_reg;
925 int offset = INTVAL (XEXP (x, 1));
928 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
929 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
931 /* Choose which way to round the offset. Round up if we
932 are >= halfway to the next boundary. */
933 if ((offset & mask) >= ((mask + 1) / 2))
934 newoffset = (offset & ~ mask) + mask + 1;
936 newoffset = (offset & ~ mask);
938 /* If the newoffset will not fit in 14 bits (ldo), then
939 handling this would take 4 or 5 instructions (2 to load
940 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
941 add the new offset and the SYMBOL_REF.) Combine can
942 not handle 4->2 or 5->2 combinations, so do not create
944 if (! VAL_14_BITS_P (newoffset)
945 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
947 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
950 gen_rtx_HIGH (Pmode, const_part));
953 gen_rtx_LO_SUM (Pmode,
954 tmp_reg, const_part));
958 if (! VAL_14_BITS_P (newoffset))
959 int_part = force_reg (Pmode, GEN_INT (newoffset));
961 int_part = GEN_INT (newoffset);
963 ptr_reg = force_reg (Pmode,
965 force_reg (Pmode, XEXP (x, 0)),
968 return plus_constant (ptr_reg, offset - newoffset);
971 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
973 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
974 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
975 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
976 && (OBJECT_P (XEXP (x, 1))
977 || GET_CODE (XEXP (x, 1)) == SUBREG)
978 && GET_CODE (XEXP (x, 1)) != CONST)
980 int val = INTVAL (XEXP (XEXP (x, 0), 1));
984 if (GET_CODE (reg1) != REG)
985 reg1 = force_reg (Pmode, force_operand (reg1, 0));
987 reg2 = XEXP (XEXP (x, 0), 0);
988 if (GET_CODE (reg2) != REG)
989 reg2 = force_reg (Pmode, force_operand (reg2, 0));
991 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
998 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1000 Only do so for floating point modes since this is more speculative
1001 and we lose if it's an integer store. */
1002 if (GET_CODE (x) == PLUS
1003 && GET_CODE (XEXP (x, 0)) == PLUS
1004 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1005 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1006 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1007 && (mode == SFmode || mode == DFmode))
1010 /* First, try and figure out what to use as a base register. */
1011 rtx reg1, reg2, base, idx, orig_base;
1013 reg1 = XEXP (XEXP (x, 0), 1);
1018 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1019 then emit_move_sequence will turn on REG_POINTER so we'll know
1020 it's a base register below. */
1021 if (GET_CODE (reg1) != REG)
1022 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1024 if (GET_CODE (reg2) != REG)
1025 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1027 /* Figure out what the base and index are. */
1029 if (GET_CODE (reg1) == REG
1030 && REG_POINTER (reg1))
1033 orig_base = XEXP (XEXP (x, 0), 1);
1034 idx = gen_rtx_PLUS (Pmode,
1035 gen_rtx_MULT (Pmode,
1036 XEXP (XEXP (XEXP (x, 0), 0), 0),
1037 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1040 else if (GET_CODE (reg2) == REG
1041 && REG_POINTER (reg2))
1044 orig_base = XEXP (x, 1);
1051 /* If the index adds a large constant, try to scale the
1052 constant so that it can be loaded with only one insn. */
1053 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1054 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1055 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1056 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1058 /* Divide the CONST_INT by the scale factor, then add it to A. */
1059 int val = INTVAL (XEXP (idx, 1));
1061 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1062 reg1 = XEXP (XEXP (idx, 0), 0);
1063 if (GET_CODE (reg1) != REG)
1064 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1066 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1068 /* We can now generate a simple scaled indexed address. */
1071 (Pmode, gen_rtx_PLUS (Pmode,
1072 gen_rtx_MULT (Pmode, reg1,
1073 XEXP (XEXP (idx, 0), 1)),
1077 /* If B + C is still a valid base register, then add them. */
1078 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1079 && INTVAL (XEXP (idx, 1)) <= 4096
1080 && INTVAL (XEXP (idx, 1)) >= -4096)
1082 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1085 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1087 reg2 = XEXP (XEXP (idx, 0), 0);
1088 if (GET_CODE (reg2) != CONST_INT)
1089 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1091 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1092 gen_rtx_MULT (Pmode,
1098 /* Get the index into a register, then add the base + index and
1099 return a register holding the result. */
1101 /* First get A into a register. */
1102 reg1 = XEXP (XEXP (idx, 0), 0);
1103 if (GET_CODE (reg1) != REG)
1104 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1106 /* And get B into a register. */
1107 reg2 = XEXP (idx, 1);
1108 if (GET_CODE (reg2) != REG)
1109 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1111 reg1 = force_reg (Pmode,
1112 gen_rtx_PLUS (Pmode,
1113 gen_rtx_MULT (Pmode, reg1,
1114 XEXP (XEXP (idx, 0), 1)),
1117 /* Add the result to our base register and return. */
1118 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1122 /* Uh-oh. We might have an address for x[n-100000]. This needs
1123 special handling to avoid creating an indexed memory address
1124 with x-100000 as the base.
1126 If the constant part is small enough, then it's still safe because
1127 there is a guard page at the beginning and end of the data segment.
1129 Scaled references are common enough that we want to try and rearrange the
1130 terms so that we can use indexing for these addresses too. Only
1131 do the optimization for floatint point modes. */
1133 if (GET_CODE (x) == PLUS
1134 && symbolic_expression_p (XEXP (x, 1)))
1136 /* Ugly. We modify things here so that the address offset specified
1137 by the index expression is computed first, then added to x to form
1138 the entire address. */
1140 rtx regx1, regx2, regy1, regy2, y;
1142 /* Strip off any CONST. */
1144 if (GET_CODE (y) == CONST)
1147 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1149 /* See if this looks like
1150 (plus (mult (reg) (shadd_const))
1151 (const (plus (symbol_ref) (const_int))))
1153 Where const_int is small. In that case the const
1154 expression is a valid pointer for indexing.
1156 If const_int is big, but can be divided evenly by shadd_const
1157 and added to (reg). This allows more scaled indexed addresses. */
1158 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1159 && GET_CODE (XEXP (x, 0)) == MULT
1160 && GET_CODE (XEXP (y, 1)) == CONST_INT
1161 && INTVAL (XEXP (y, 1)) >= -4096
1162 && INTVAL (XEXP (y, 1)) <= 4095
1163 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1164 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1166 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1170 if (GET_CODE (reg1) != REG)
1171 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1173 reg2 = XEXP (XEXP (x, 0), 0);
1174 if (GET_CODE (reg2) != REG)
1175 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1177 return force_reg (Pmode,
1178 gen_rtx_PLUS (Pmode,
1179 gen_rtx_MULT (Pmode,
1184 else if ((mode == DFmode || mode == SFmode)
1185 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1186 && GET_CODE (XEXP (x, 0)) == MULT
1187 && GET_CODE (XEXP (y, 1)) == CONST_INT
1188 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1189 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1190 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1193 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1194 / INTVAL (XEXP (XEXP (x, 0), 1))));
1195 regx2 = XEXP (XEXP (x, 0), 0);
1196 if (GET_CODE (regx2) != REG)
1197 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1198 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1202 gen_rtx_PLUS (Pmode,
1203 gen_rtx_MULT (Pmode, regx2,
1204 XEXP (XEXP (x, 0), 1)),
1205 force_reg (Pmode, XEXP (y, 0))));
1207 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1208 && INTVAL (XEXP (y, 1)) >= -4096
1209 && INTVAL (XEXP (y, 1)) <= 4095)
1211 /* This is safe because of the guard page at the
1212 beginning and end of the data space. Just
1213 return the original address. */
1218 /* Doesn't look like one we can optimize. */
1219 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1220 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1221 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1222 regx1 = force_reg (Pmode,
1223 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1225 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1233 /* For the HPPA, REG and REG+CONST is cost 0
1234 and addresses involving symbolic constants are cost 2.
1236 PIC addresses are very expensive.
1238 It is no coincidence that this has the same structure
1239 as GO_IF_LEGITIMATE_ADDRESS. */
1242 hppa_address_cost (rtx X)
1244 switch (GET_CODE (X))
1257 /* Compute a (partial) cost for rtx X. Return true if the complete
1258 cost has been computed, and false if subexpressions should be
1259 scanned. In either case, *TOTAL contains the cost result. */
1262 hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
1267 if (INTVAL (x) == 0)
1269 else if (INT_14_BITS (x))
1286 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1287 && outer_code != SET)
1294 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1295 *total = COSTS_N_INSNS (3);
1296 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1297 *total = COSTS_N_INSNS (8);
1299 *total = COSTS_N_INSNS (20);
1303 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1305 *total = COSTS_N_INSNS (14);
1313 *total = COSTS_N_INSNS (60);
1316 case PLUS: /* this includes shNadd insns */
1318 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1319 *total = COSTS_N_INSNS (3);
1321 *total = COSTS_N_INSNS (1);
1327 *total = COSTS_N_INSNS (1);
1335 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1336 new rtx with the correct mode. */
1338 force_mode (enum machine_mode mode, rtx orig)
1340 if (mode == GET_MODE (orig))
1343 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1345 return gen_rtx_REG (mode, REGNO (orig));
1348 /* Return 1 if *X is a thread-local symbol. */
1351 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1353 return PA_SYMBOL_REF_TLS_P (*x);
1356 /* Return 1 if X contains a thread-local symbol. */
1359 pa_tls_referenced_p (rtx x)
1361 if (!TARGET_HAVE_TLS)
1364 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1367 /* Emit insns to move operands[1] into operands[0].
1369 Return 1 if we have written out everything that needs to be done to
1370 do the move. Otherwise, return 0 and the caller will emit the move
1373 Note SCRATCH_REG may not be in the proper mode depending on how it
1374 will be used. This routine is responsible for creating a new copy
1375 of SCRATCH_REG in the proper mode. */
1378 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1380 register rtx operand0 = operands[0];
1381 register rtx operand1 = operands[1];
1384 /* We can only handle indexed addresses in the destination operand
1385 of floating point stores. Thus, we need to break out indexed
1386 addresses from the destination operand. */
1387 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1389 gcc_assert (can_create_pseudo_p ());
1391 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1392 operand0 = replace_equiv_address (operand0, tem);
1395 /* On targets with non-equivalent space registers, break out unscaled
1396 indexed addresses from the source operand before the final CSE.
1397 We have to do this because the REG_POINTER flag is not correctly
1398 carried through various optimization passes and CSE may substitute
1399 a pseudo without the pointer set for one with the pointer set. As
1400 a result, we loose various opportunities to create insns with
1401 unscaled indexed addresses. */
1402 if (!TARGET_NO_SPACE_REGS
1403 && !cse_not_expected
1404 && GET_CODE (operand1) == MEM
1405 && GET_CODE (XEXP (operand1, 0)) == PLUS
1406 && REG_P (XEXP (XEXP (operand1, 0), 0))
1407 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1409 = replace_equiv_address (operand1,
1410 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1413 && reload_in_progress && GET_CODE (operand0) == REG
1414 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1415 operand0 = reg_equiv_mem[REGNO (operand0)];
1416 else if (scratch_reg
1417 && reload_in_progress && GET_CODE (operand0) == SUBREG
1418 && GET_CODE (SUBREG_REG (operand0)) == REG
1419 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1421 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1422 the code which tracks sets/uses for delete_output_reload. */
1423 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1424 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1425 SUBREG_BYTE (operand0));
1426 operand0 = alter_subreg (&temp);
1430 && reload_in_progress && GET_CODE (operand1) == REG
1431 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1432 operand1 = reg_equiv_mem[REGNO (operand1)];
1433 else if (scratch_reg
1434 && reload_in_progress && GET_CODE (operand1) == SUBREG
1435 && GET_CODE (SUBREG_REG (operand1)) == REG
1436 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1438 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1439 the code which tracks sets/uses for delete_output_reload. */
1440 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1441 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1442 SUBREG_BYTE (operand1));
1443 operand1 = alter_subreg (&temp);
1446 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1447 && ((tem = find_replacement (&XEXP (operand0, 0)))
1448 != XEXP (operand0, 0)))
1449 operand0 = replace_equiv_address (operand0, tem);
1451 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1452 && ((tem = find_replacement (&XEXP (operand1, 0)))
1453 != XEXP (operand1, 0)))
1454 operand1 = replace_equiv_address (operand1, tem);
1456 /* Handle secondary reloads for loads/stores of FP registers from
1457 REG+D addresses where D does not fit in 5 or 14 bits, including
1458 (subreg (mem (addr))) cases. */
1460 && fp_reg_operand (operand0, mode)
1461 && ((GET_CODE (operand1) == MEM
1462 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1463 XEXP (operand1, 0)))
1464 || ((GET_CODE (operand1) == SUBREG
1465 && GET_CODE (XEXP (operand1, 0)) == MEM
1466 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1468 XEXP (XEXP (operand1, 0), 0))))))
1470 if (GET_CODE (operand1) == SUBREG)
1471 operand1 = XEXP (operand1, 0);
1473 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1474 it in WORD_MODE regardless of what mode it was originally given
1476 scratch_reg = force_mode (word_mode, scratch_reg);
1478 /* D might not fit in 14 bits either; for such cases load D into
1480 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1482 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1483 emit_move_insn (scratch_reg,
1484 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1486 XEXP (XEXP (operand1, 0), 0),
1490 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1491 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1492 replace_equiv_address (operand1, scratch_reg)));
1495 else if (scratch_reg
1496 && fp_reg_operand (operand1, mode)
1497 && ((GET_CODE (operand0) == MEM
1498 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1500 XEXP (operand0, 0)))
1501 || ((GET_CODE (operand0) == SUBREG)
1502 && GET_CODE (XEXP (operand0, 0)) == MEM
1503 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1505 XEXP (XEXP (operand0, 0), 0)))))
1507 if (GET_CODE (operand0) == SUBREG)
1508 operand0 = XEXP (operand0, 0);
1510 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1511 it in WORD_MODE regardless of what mode it was originally given
1513 scratch_reg = force_mode (word_mode, scratch_reg);
1515 /* D might not fit in 14 bits either; for such cases load D into
1517 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1519 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1520 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1523 XEXP (XEXP (operand0, 0),
1528 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1529 emit_insn (gen_rtx_SET (VOIDmode,
1530 replace_equiv_address (operand0, scratch_reg),
1534 /* Handle secondary reloads for loads of FP registers from constant
1535 expressions by forcing the constant into memory.
1537 Use scratch_reg to hold the address of the memory location.
1539 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1540 NO_REGS when presented with a const_int and a register class
1541 containing only FP registers. Doing so unfortunately creates
1542 more problems than it solves. Fix this for 2.5. */
1543 else if (scratch_reg
1544 && CONSTANT_P (operand1)
1545 && fp_reg_operand (operand0, mode))
1547 rtx const_mem, xoperands[2];
1549 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1550 it in WORD_MODE regardless of what mode it was originally given
1552 scratch_reg = force_mode (word_mode, scratch_reg);
1554 /* Force the constant into memory and put the address of the
1555 memory location into scratch_reg. */
1556 const_mem = force_const_mem (mode, operand1);
1557 xoperands[0] = scratch_reg;
1558 xoperands[1] = XEXP (const_mem, 0);
1559 emit_move_sequence (xoperands, Pmode, 0);
1561 /* Now load the destination register. */
1562 emit_insn (gen_rtx_SET (mode, operand0,
1563 replace_equiv_address (const_mem, scratch_reg)));
1566 /* Handle secondary reloads for SAR. These occur when trying to load
1567 the SAR from memory, FP register, or with a constant. */
1568 else if (scratch_reg
1569 && GET_CODE (operand0) == REG
1570 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1571 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1572 && (GET_CODE (operand1) == MEM
1573 || GET_CODE (operand1) == CONST_INT
1574 || (GET_CODE (operand1) == REG
1575 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1577 /* D might not fit in 14 bits either; for such cases load D into
1579 if (GET_CODE (operand1) == MEM
1580 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1582 /* We are reloading the address into the scratch register, so we
1583 want to make sure the scratch register is a full register. */
1584 scratch_reg = force_mode (word_mode, scratch_reg);
1586 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1587 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1590 XEXP (XEXP (operand1, 0),
1594 /* Now we are going to load the scratch register from memory,
1595 we want to load it in the same width as the original MEM,
1596 which must be the same as the width of the ultimate destination,
1598 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1600 emit_move_insn (scratch_reg,
1601 replace_equiv_address (operand1, scratch_reg));
1605 /* We want to load the scratch register using the same mode as
1606 the ultimate destination. */
1607 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1609 emit_move_insn (scratch_reg, operand1);
1612 /* And emit the insn to set the ultimate destination. We know that
1613 the scratch register has the same mode as the destination at this
1615 emit_move_insn (operand0, scratch_reg);
1618 /* Handle the most common case: storing into a register. */
1619 else if (register_operand (operand0, mode))
1621 if (register_operand (operand1, mode)
1622 || (GET_CODE (operand1) == CONST_INT
1623 && cint_ok_for_move (INTVAL (operand1)))
1624 || (operand1 == CONST0_RTX (mode))
1625 || (GET_CODE (operand1) == HIGH
1626 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1627 /* Only `general_operands' can come here, so MEM is ok. */
1628 || GET_CODE (operand1) == MEM)
1630 /* Various sets are created during RTL generation which don't
1631 have the REG_POINTER flag correctly set. After the CSE pass,
1632 instruction recognition can fail if we don't consistently
1633 set this flag when performing register copies. This should
1634 also improve the opportunities for creating insns that use
1635 unscaled indexing. */
1636 if (REG_P (operand0) && REG_P (operand1))
1638 if (REG_POINTER (operand1)
1639 && !REG_POINTER (operand0)
1640 && !HARD_REGISTER_P (operand0))
1641 copy_reg_pointer (operand0, operand1);
1642 else if (REG_POINTER (operand0)
1643 && !REG_POINTER (operand1)
1644 && !HARD_REGISTER_P (operand1))
1645 copy_reg_pointer (operand1, operand0);
1648 /* When MEMs are broken out, the REG_POINTER flag doesn't
1649 get set. In some cases, we can set the REG_POINTER flag
1650 from the declaration for the MEM. */
1651 if (REG_P (operand0)
1652 && GET_CODE (operand1) == MEM
1653 && !REG_POINTER (operand0))
1655 tree decl = MEM_EXPR (operand1);
1657 /* Set the register pointer flag and register alignment
1658 if the declaration for this memory reference is a
1659 pointer type. Fortran indirect argument references
1662 && !(flag_argument_noalias > 1
1663 && TREE_CODE (decl) == INDIRECT_REF
1664 && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1668 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1670 if (TREE_CODE (decl) == COMPONENT_REF)
1671 decl = TREE_OPERAND (decl, 1);
1673 type = TREE_TYPE (decl);
1674 if (TREE_CODE (type) == ARRAY_TYPE)
1675 type = get_inner_array_type (type);
1677 if (POINTER_TYPE_P (type))
1681 type = TREE_TYPE (type);
1682 /* Using TYPE_ALIGN_OK is rather conservative as
1683 only the ada frontend actually sets it. */
1684 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1686 mark_reg_pointer (operand0, align);
1691 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1695 else if (GET_CODE (operand0) == MEM)
1697 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1698 && !(reload_in_progress || reload_completed))
1700 rtx temp = gen_reg_rtx (DFmode);
1702 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1703 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1706 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1708 /* Run this case quickly. */
1709 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1712 if (! (reload_in_progress || reload_completed))
1714 operands[0] = validize_mem (operand0);
1715 operands[1] = operand1 = force_reg (mode, operand1);
1719 /* Simplify the source if we need to.
1720 Note we do have to handle function labels here, even though we do
1721 not consider them legitimate constants. Loop optimizations can
1722 call the emit_move_xxx with one as a source. */
1723 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1724 || function_label_operand (operand1, mode)
1725 || (GET_CODE (operand1) == HIGH
1726 && symbolic_operand (XEXP (operand1, 0), mode)))
1730 if (GET_CODE (operand1) == HIGH)
1733 operand1 = XEXP (operand1, 0);
1735 if (symbolic_operand (operand1, mode))
1737 /* Argh. The assembler and linker can't handle arithmetic
1740 So we force the plabel into memory, load operand0 from
1741 the memory location, then add in the constant part. */
1742 if ((GET_CODE (operand1) == CONST
1743 && GET_CODE (XEXP (operand1, 0)) == PLUS
1744 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1745 || function_label_operand (operand1, mode))
1747 rtx temp, const_part;
1749 /* Figure out what (if any) scratch register to use. */
1750 if (reload_in_progress || reload_completed)
1752 scratch_reg = scratch_reg ? scratch_reg : operand0;
1753 /* SCRATCH_REG will hold an address and maybe the actual
1754 data. We want it in WORD_MODE regardless of what mode it
1755 was originally given to us. */
1756 scratch_reg = force_mode (word_mode, scratch_reg);
1759 scratch_reg = gen_reg_rtx (Pmode);
1761 if (GET_CODE (operand1) == CONST)
1763 /* Save away the constant part of the expression. */
1764 const_part = XEXP (XEXP (operand1, 0), 1);
1765 gcc_assert (GET_CODE (const_part) == CONST_INT);
1767 /* Force the function label into memory. */
1768 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1772 /* No constant part. */
1773 const_part = NULL_RTX;
1775 /* Force the function label into memory. */
1776 temp = force_const_mem (mode, operand1);
1780 /* Get the address of the memory location. PIC-ify it if
1782 temp = XEXP (temp, 0);
1784 temp = legitimize_pic_address (temp, mode, scratch_reg);
1786 /* Put the address of the memory location into our destination
1789 emit_move_sequence (operands, mode, scratch_reg);
1791 /* Now load from the memory location into our destination
1793 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1794 emit_move_sequence (operands, mode, scratch_reg);
1796 /* And add back in the constant part. */
1797 if (const_part != NULL_RTX)
1798 expand_inc (operand0, const_part);
1807 if (reload_in_progress || reload_completed)
1809 temp = scratch_reg ? scratch_reg : operand0;
1810 /* TEMP will hold an address and maybe the actual
1811 data. We want it in WORD_MODE regardless of what mode it
1812 was originally given to us. */
1813 temp = force_mode (word_mode, temp);
1816 temp = gen_reg_rtx (Pmode);
1818 /* (const (plus (symbol) (const_int))) must be forced to
1819 memory during/after reload if the const_int will not fit
1821 if (GET_CODE (operand1) == CONST
1822 && GET_CODE (XEXP (operand1, 0)) == PLUS
1823 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1824 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1825 && (reload_completed || reload_in_progress)
1828 rtx const_mem = force_const_mem (mode, operand1);
1829 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1831 operands[1] = replace_equiv_address (const_mem, operands[1]);
1832 emit_move_sequence (operands, mode, temp);
1836 operands[1] = legitimize_pic_address (operand1, mode, temp);
1837 if (REG_P (operand0) && REG_P (operands[1]))
1838 copy_reg_pointer (operand0, operands[1]);
1839 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1842 /* On the HPPA, references to data space are supposed to use dp,
1843 register 27, but showing it in the RTL inhibits various cse
1844 and loop optimizations. */
1849 if (reload_in_progress || reload_completed)
1851 temp = scratch_reg ? scratch_reg : operand0;
1852 /* TEMP will hold an address and maybe the actual
1853 data. We want it in WORD_MODE regardless of what mode it
1854 was originally given to us. */
1855 temp = force_mode (word_mode, temp);
1858 temp = gen_reg_rtx (mode);
1860 /* Loading a SYMBOL_REF into a register makes that register
1861 safe to be used as the base in an indexed address.
1863 Don't mark hard registers though. That loses. */
1864 if (GET_CODE (operand0) == REG
1865 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1866 mark_reg_pointer (operand0, BITS_PER_UNIT);
1867 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1868 mark_reg_pointer (temp, BITS_PER_UNIT);
1871 set = gen_rtx_SET (mode, operand0, temp);
1873 set = gen_rtx_SET (VOIDmode,
1875 gen_rtx_LO_SUM (mode, temp, operand1));
1877 emit_insn (gen_rtx_SET (VOIDmode,
1879 gen_rtx_HIGH (mode, operand1)));
1885 else if (pa_tls_referenced_p (operand1))
1890 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1892 addend = XEXP (XEXP (tmp, 0), 1);
1893 tmp = XEXP (XEXP (tmp, 0), 0);
1896 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1897 tmp = legitimize_tls_address (tmp);
1900 tmp = gen_rtx_PLUS (mode, tmp, addend);
1901 tmp = force_operand (tmp, operands[0]);
1905 else if (GET_CODE (operand1) != CONST_INT
1906 || !cint_ok_for_move (INTVAL (operand1)))
1910 HOST_WIDE_INT value = 0;
1911 HOST_WIDE_INT insv = 0;
1914 if (GET_CODE (operand1) == CONST_INT)
1915 value = INTVAL (operand1);
1918 && GET_CODE (operand1) == CONST_INT
1919 && HOST_BITS_PER_WIDE_INT > 32
1920 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1924 /* Extract the low order 32 bits of the value and sign extend.
1925 If the new value is the same as the original value, we can
1926 can use the original value as-is. If the new value is
1927 different, we use it and insert the most-significant 32-bits
1928 of the original value into the final result. */
1929 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1930 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1933 #if HOST_BITS_PER_WIDE_INT > 32
1934 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
1938 operand1 = GEN_INT (nval);
1942 if (reload_in_progress || reload_completed)
1943 temp = scratch_reg ? scratch_reg : operand0;
1945 temp = gen_reg_rtx (mode);
1947 /* We don't directly split DImode constants on 32-bit targets
1948 because PLUS uses an 11-bit immediate and the insn sequence
1949 generated is not as efficient as the one using HIGH/LO_SUM. */
1950 if (GET_CODE (operand1) == CONST_INT
1951 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
1952 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
1955 /* Directly break constant into high and low parts. This
1956 provides better optimization opportunities because various
1957 passes recognize constants split with PLUS but not LO_SUM.
1958 We use a 14-bit signed low part except when the addition
1959 of 0x4000 to the high part might change the sign of the
1961 HOST_WIDE_INT low = value & 0x3fff;
1962 HOST_WIDE_INT high = value & ~ 0x3fff;
1966 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1974 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1975 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1979 emit_insn (gen_rtx_SET (VOIDmode, temp,
1980 gen_rtx_HIGH (mode, operand1)));
1981 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1984 insn = emit_move_insn (operands[0], operands[1]);
1986 /* Now insert the most significant 32 bits of the value
1987 into the register. When we don't have a second register
1988 available, it could take up to nine instructions to load
1989 a 64-bit integer constant. Prior to reload, we force
1990 constants that would take more than three instructions
1991 to load to the constant pool. During and after reload,
1992 we have to handle all possible values. */
1995 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
1996 register and the value to be inserted is outside the
1997 range that can be loaded with three depdi instructions. */
1998 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2000 operand1 = GEN_INT (insv);
2002 emit_insn (gen_rtx_SET (VOIDmode, temp,
2003 gen_rtx_HIGH (mode, operand1)));
2004 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2005 emit_insn (gen_insv (operand0, GEN_INT (32),
2010 int len = 5, pos = 27;
2012 /* Insert the bits using the depdi instruction. */
2015 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2016 HOST_WIDE_INT sign = v5 < 0;
2018 /* Left extend the insertion. */
2019 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2020 while (pos > 0 && (insv & 1) == sign)
2022 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2027 emit_insn (gen_insv (operand0, GEN_INT (len),
2028 GEN_INT (pos), GEN_INT (v5)));
2030 len = pos > 0 && pos < 5 ? pos : 5;
2036 set_unique_reg_note (insn, REG_EQUAL, op1);
2041 /* Now have insn-emit do whatever it normally does. */
2045 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2046 it will need a link/runtime reloc). */
2049 reloc_needed (tree exp)
2053 switch (TREE_CODE (exp))
2058 case POINTER_PLUS_EXPR:
2061 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2062 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2067 case NON_LVALUE_EXPR:
2068 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2074 unsigned HOST_WIDE_INT ix;
2076 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2078 reloc |= reloc_needed (value);
2091 /* Does operand (which is a symbolic_operand) live in text space?
2092 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2096 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2098 if (GET_CODE (operand) == CONST)
2099 operand = XEXP (XEXP (operand, 0), 0);
2102 if (GET_CODE (operand) == SYMBOL_REF)
2103 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2107 if (GET_CODE (operand) == SYMBOL_REF)
2108 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2114 /* Return the best assembler insn template
2115 for moving operands[1] into operands[0] as a fullword. */
2117 singlemove_string (rtx *operands)
2119 HOST_WIDE_INT intval;
2121 if (GET_CODE (operands[0]) == MEM)
2122 return "stw %r1,%0";
2123 if (GET_CODE (operands[1]) == MEM)
2125 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2130 gcc_assert (GET_MODE (operands[1]) == SFmode);
2132 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2134 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2135 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2137 operands[1] = GEN_INT (i);
2138 /* Fall through to CONST_INT case. */
2140 if (GET_CODE (operands[1]) == CONST_INT)
2142 intval = INTVAL (operands[1]);
2144 if (VAL_14_BITS_P (intval))
2146 else if ((intval & 0x7ff) == 0)
2147 return "ldil L'%1,%0";
2148 else if (zdepi_cint_p (intval))
2149 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2151 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2153 return "copy %1,%0";
2157 /* Compute position (in OP[1]) and width (in OP[2])
2158 useful for copying IMM to a register using the zdepi
2159 instructions. Store the immediate value to insert in OP[0]. */
2161 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2165 /* Find the least significant set bit in IMM. */
2166 for (lsb = 0; lsb < 32; lsb++)
2173 /* Choose variants based on *sign* of the 5-bit field. */
2174 if ((imm & 0x10) == 0)
2175 len = (lsb <= 28) ? 4 : 32 - lsb;
2178 /* Find the width of the bitstring in IMM. */
2179 for (len = 5; len < 32; len++)
2181 if ((imm & (1 << len)) == 0)
2185 /* Sign extend IMM as a 5-bit value. */
2186 imm = (imm & 0xf) - 0x10;
2194 /* Compute position (in OP[1]) and width (in OP[2])
2195 useful for copying IMM to a register using the depdi,z
2196 instructions. Store the immediate value to insert in OP[0]. */
2198 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2200 HOST_WIDE_INT lsb, len;
2202 /* Find the least significant set bit in IMM. */
2203 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2210 /* Choose variants based on *sign* of the 5-bit field. */
2211 if ((imm & 0x10) == 0)
2212 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2213 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2216 /* Find the width of the bitstring in IMM. */
2217 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2219 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2223 /* Sign extend IMM as a 5-bit value. */
2224 imm = (imm & 0xf) - 0x10;
2232 /* Output assembler code to perform a doubleword move insn
2233 with operands OPERANDS. */
2236 output_move_double (rtx *operands)
2238 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2240 rtx addreg0 = 0, addreg1 = 0;
2242 /* First classify both operands. */
2244 if (REG_P (operands[0]))
2246 else if (offsettable_memref_p (operands[0]))
2248 else if (GET_CODE (operands[0]) == MEM)
2253 if (REG_P (operands[1]))
2255 else if (CONSTANT_P (operands[1]))
2257 else if (offsettable_memref_p (operands[1]))
2259 else if (GET_CODE (operands[1]) == MEM)
2264 /* Check for the cases that the operand constraints are not
2265 supposed to allow to happen. */
2266 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2268 /* Handle copies between general and floating registers. */
2270 if (optype0 == REGOP && optype1 == REGOP
2271 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2273 if (FP_REG_P (operands[0]))
2275 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2276 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2277 return "{fldds|fldd} -16(%%sp),%0";
2281 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2282 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2283 return "{ldws|ldw} -12(%%sp),%R0";
2287 /* Handle auto decrementing and incrementing loads and stores
2288 specifically, since the structure of the function doesn't work
2289 for them without major modification. Do it better when we learn
2290 this port about the general inc/dec addressing of PA.
2291 (This was written by tege. Chide him if it doesn't work.) */
2293 if (optype0 == MEMOP)
2295 /* We have to output the address syntax ourselves, since print_operand
2296 doesn't deal with the addresses we want to use. Fix this later. */
2298 rtx addr = XEXP (operands[0], 0);
2299 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2301 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2303 operands[0] = XEXP (addr, 0);
2304 gcc_assert (GET_CODE (operands[1]) == REG
2305 && GET_CODE (operands[0]) == REG);
2307 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2309 /* No overlap between high target register and address
2310 register. (We do this in a non-obvious way to
2311 save a register file writeback) */
2312 if (GET_CODE (addr) == POST_INC)
2313 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2314 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2316 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2318 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2320 operands[0] = XEXP (addr, 0);
2321 gcc_assert (GET_CODE (operands[1]) == REG
2322 && GET_CODE (operands[0]) == REG);
2324 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2325 /* No overlap between high target register and address
2326 register. (We do this in a non-obvious way to save a
2327 register file writeback) */
2328 if (GET_CODE (addr) == PRE_INC)
2329 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2330 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2333 if (optype1 == MEMOP)
2335 /* We have to output the address syntax ourselves, since print_operand
2336 doesn't deal with the addresses we want to use. Fix this later. */
2338 rtx addr = XEXP (operands[1], 0);
2339 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2341 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2343 operands[1] = XEXP (addr, 0);
2344 gcc_assert (GET_CODE (operands[0]) == REG
2345 && GET_CODE (operands[1]) == REG);
2347 if (!reg_overlap_mentioned_p (high_reg, addr))
2349 /* No overlap between high target register and address
2350 register. (We do this in a non-obvious way to
2351 save a register file writeback) */
2352 if (GET_CODE (addr) == POST_INC)
2353 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2354 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2358 /* This is an undefined situation. We should load into the
2359 address register *and* update that register. Probably
2360 we don't need to handle this at all. */
2361 if (GET_CODE (addr) == POST_INC)
2362 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2363 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2366 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2368 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2370 operands[1] = XEXP (addr, 0);
2371 gcc_assert (GET_CODE (operands[0]) == REG
2372 && GET_CODE (operands[1]) == REG);
2374 if (!reg_overlap_mentioned_p (high_reg, addr))
2376 /* No overlap between high target register and address
2377 register. (We do this in a non-obvious way to
2378 save a register file writeback) */
2379 if (GET_CODE (addr) == PRE_INC)
2380 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2381 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2385 /* This is an undefined situation. We should load into the
2386 address register *and* update that register. Probably
2387 we don't need to handle this at all. */
2388 if (GET_CODE (addr) == PRE_INC)
2389 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2390 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2393 else if (GET_CODE (addr) == PLUS
2394 && GET_CODE (XEXP (addr, 0)) == MULT)
2397 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2399 if (!reg_overlap_mentioned_p (high_reg, addr))
2401 xoperands[0] = high_reg;
2402 xoperands[1] = XEXP (addr, 1);
2403 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2404 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2405 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2407 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2411 xoperands[0] = high_reg;
2412 xoperands[1] = XEXP (addr, 1);
2413 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2414 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2415 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2417 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2422 /* If an operand is an unoffsettable memory ref, find a register
2423 we can increment temporarily to make it refer to the second word. */
2425 if (optype0 == MEMOP)
2426 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2428 if (optype1 == MEMOP)
2429 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2431 /* Ok, we can do one word at a time.
2432 Normally we do the low-numbered word first.
2434 In either case, set up in LATEHALF the operands to use
2435 for the high-numbered word and in some cases alter the
2436 operands in OPERANDS to be suitable for the low-numbered word. */
2438 if (optype0 == REGOP)
2439 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2440 else if (optype0 == OFFSOP)
2441 latehalf[0] = adjust_address (operands[0], SImode, 4);
2443 latehalf[0] = operands[0];
2445 if (optype1 == REGOP)
2446 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2447 else if (optype1 == OFFSOP)
2448 latehalf[1] = adjust_address (operands[1], SImode, 4);
2449 else if (optype1 == CNSTOP)
2450 split_double (operands[1], &operands[1], &latehalf[1]);
2452 latehalf[1] = operands[1];
2454 /* If the first move would clobber the source of the second one,
2455 do them in the other order.
2457 This can happen in two cases:
2459 mem -> register where the first half of the destination register
2460 is the same register used in the memory's address. Reload
2461 can create such insns.
2463 mem in this case will be either register indirect or register
2464 indirect plus a valid offset.
2466 register -> register move where REGNO(dst) == REGNO(src + 1)
2467 someone (Tim/Tege?) claimed this can happen for parameter loads.
2469 Handle mem -> register case first. */
2470 if (optype0 == REGOP
2471 && (optype1 == MEMOP || optype1 == OFFSOP)
2472 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2475 /* Do the late half first. */
2477 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2478 output_asm_insn (singlemove_string (latehalf), latehalf);
2482 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2483 return singlemove_string (operands);
2486 /* Now handle register -> register case. */
2487 if (optype0 == REGOP && optype1 == REGOP
2488 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2490 output_asm_insn (singlemove_string (latehalf), latehalf);
2491 return singlemove_string (operands);
2494 /* Normal case: do the two words, low-numbered first. */
2496 output_asm_insn (singlemove_string (operands), operands);
2498 /* Make any unoffsettable addresses point at high-numbered word. */
2500 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2502 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2505 output_asm_insn (singlemove_string (latehalf), latehalf);
2507 /* Undo the adds we just did. */
2509 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2511 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2517 output_fp_move_double (rtx *operands)
2519 if (FP_REG_P (operands[0]))
2521 if (FP_REG_P (operands[1])
2522 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2523 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2525 output_asm_insn ("fldd%F1 %1,%0", operands);
2527 else if (FP_REG_P (operands[1]))
2529 output_asm_insn ("fstd%F0 %1,%0", operands);
2535 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2537 /* This is a pain. You have to be prepared to deal with an
2538 arbitrary address here including pre/post increment/decrement.
2540 so avoid this in the MD. */
2541 gcc_assert (GET_CODE (operands[0]) == REG);
2543 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2544 xoperands[0] = operands[0];
2545 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2550 /* Return a REG that occurs in ADDR with coefficient 1.
2551 ADDR can be effectively incremented by incrementing REG. */
2554 find_addr_reg (rtx addr)
2556 while (GET_CODE (addr) == PLUS)
2558 if (GET_CODE (XEXP (addr, 0)) == REG)
2559 addr = XEXP (addr, 0);
2560 else if (GET_CODE (XEXP (addr, 1)) == REG)
2561 addr = XEXP (addr, 1);
2562 else if (CONSTANT_P (XEXP (addr, 0)))
2563 addr = XEXP (addr, 1);
2564 else if (CONSTANT_P (XEXP (addr, 1)))
2565 addr = XEXP (addr, 0);
2569 gcc_assert (GET_CODE (addr) == REG);
2573 /* Emit code to perform a block move.
2575 OPERANDS[0] is the destination pointer as a REG, clobbered.
2576 OPERANDS[1] is the source pointer as a REG, clobbered.
2577 OPERANDS[2] is a register for temporary storage.
2578 OPERANDS[3] is a register for temporary storage.
2579 OPERANDS[4] is the size as a CONST_INT
2580 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2581 OPERANDS[6] is another temporary register. */
2584 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2586 int align = INTVAL (operands[5]);
2587 unsigned long n_bytes = INTVAL (operands[4]);
2589 /* We can't move more than a word at a time because the PA
2590 has no longer integer move insns. (Could use fp mem ops?) */
2591 if (align > (TARGET_64BIT ? 8 : 4))
2592 align = (TARGET_64BIT ? 8 : 4);
2594 /* Note that we know each loop below will execute at least twice
2595 (else we would have open-coded the copy). */
2599 /* Pre-adjust the loop counter. */
2600 operands[4] = GEN_INT (n_bytes - 16);
2601 output_asm_insn ("ldi %4,%2", operands);
2604 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2605 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2606 output_asm_insn ("std,ma %3,8(%0)", operands);
2607 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2608 output_asm_insn ("std,ma %6,8(%0)", operands);
2610 /* Handle the residual. There could be up to 7 bytes of
2611 residual to copy! */
2612 if (n_bytes % 16 != 0)
2614 operands[4] = GEN_INT (n_bytes % 8);
2615 if (n_bytes % 16 >= 8)
2616 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2617 if (n_bytes % 8 != 0)
2618 output_asm_insn ("ldd 0(%1),%6", operands);
2619 if (n_bytes % 16 >= 8)
2620 output_asm_insn ("std,ma %3,8(%0)", operands);
2621 if (n_bytes % 8 != 0)
2622 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2627 /* Pre-adjust the loop counter. */
2628 operands[4] = GEN_INT (n_bytes - 8);
2629 output_asm_insn ("ldi %4,%2", operands);
2632 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2633 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2634 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2635 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2636 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2638 /* Handle the residual. There could be up to 7 bytes of
2639 residual to copy! */
2640 if (n_bytes % 8 != 0)
2642 operands[4] = GEN_INT (n_bytes % 4);
2643 if (n_bytes % 8 >= 4)
2644 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2645 if (n_bytes % 4 != 0)
2646 output_asm_insn ("ldw 0(%1),%6", operands);
2647 if (n_bytes % 8 >= 4)
2648 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2649 if (n_bytes % 4 != 0)
2650 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2655 /* Pre-adjust the loop counter. */
2656 operands[4] = GEN_INT (n_bytes - 4);
2657 output_asm_insn ("ldi %4,%2", operands);
2660 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2661 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2662 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2663 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2664 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2666 /* Handle the residual. */
2667 if (n_bytes % 4 != 0)
2669 if (n_bytes % 4 >= 2)
2670 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2671 if (n_bytes % 2 != 0)
2672 output_asm_insn ("ldb 0(%1),%6", operands);
2673 if (n_bytes % 4 >= 2)
2674 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2675 if (n_bytes % 2 != 0)
2676 output_asm_insn ("stb %6,0(%0)", operands);
2681 /* Pre-adjust the loop counter. */
2682 operands[4] = GEN_INT (n_bytes - 2);
2683 output_asm_insn ("ldi %4,%2", operands);
2686 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2687 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2688 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2689 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2690 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2692 /* Handle the residual. */
2693 if (n_bytes % 2 != 0)
2695 output_asm_insn ("ldb 0(%1),%3", operands);
2696 output_asm_insn ("stb %3,0(%0)", operands);
2705 /* Count the number of insns necessary to handle this block move.
2707 Basic structure is the same as emit_block_move, except that we
2708 count insns rather than emit them. */
2711 compute_movmem_length (rtx insn)
2713 rtx pat = PATTERN (insn);
2714 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2715 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2716 unsigned int n_insns = 0;
2718 /* We can't move more than four bytes at a time because the PA
2719 has no longer integer move insns. (Could use fp mem ops?) */
2720 if (align > (TARGET_64BIT ? 8 : 4))
2721 align = (TARGET_64BIT ? 8 : 4);
2723 /* The basic copying loop. */
2727 if (n_bytes % (2 * align) != 0)
2729 if ((n_bytes % (2 * align)) >= align)
2732 if ((n_bytes % align) != 0)
2736 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2740 /* Emit code to perform a block clear.
2742 OPERANDS[0] is the destination pointer as a REG, clobbered.
2743 OPERANDS[1] is a register for temporary storage.
2744 OPERANDS[2] is the size as a CONST_INT
2745 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2748 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2750 int align = INTVAL (operands[3]);
2751 unsigned long n_bytes = INTVAL (operands[2]);
2753 /* We can't clear more than a word at a time because the PA
2754 has no longer integer move insns. */
2755 if (align > (TARGET_64BIT ? 8 : 4))
2756 align = (TARGET_64BIT ? 8 : 4);
2758 /* Note that we know each loop below will execute at least twice
2759 (else we would have open-coded the copy). */
2763 /* Pre-adjust the loop counter. */
2764 operands[2] = GEN_INT (n_bytes - 16);
2765 output_asm_insn ("ldi %2,%1", operands);
2768 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2769 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2770 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2772 /* Handle the residual. There could be up to 7 bytes of
2773 residual to copy! */
2774 if (n_bytes % 16 != 0)
2776 operands[2] = GEN_INT (n_bytes % 8);
2777 if (n_bytes % 16 >= 8)
2778 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2779 if (n_bytes % 8 != 0)
2780 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2785 /* Pre-adjust the loop counter. */
2786 operands[2] = GEN_INT (n_bytes - 8);
2787 output_asm_insn ("ldi %2,%1", operands);
2790 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2791 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2792 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2794 /* Handle the residual. There could be up to 7 bytes of
2795 residual to copy! */
2796 if (n_bytes % 8 != 0)
2798 operands[2] = GEN_INT (n_bytes % 4);
2799 if (n_bytes % 8 >= 4)
2800 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2801 if (n_bytes % 4 != 0)
2802 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2807 /* Pre-adjust the loop counter. */
2808 operands[2] = GEN_INT (n_bytes - 4);
2809 output_asm_insn ("ldi %2,%1", operands);
2812 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2813 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2814 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2816 /* Handle the residual. */
2817 if (n_bytes % 4 != 0)
2819 if (n_bytes % 4 >= 2)
2820 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2821 if (n_bytes % 2 != 0)
2822 output_asm_insn ("stb %%r0,0(%0)", operands);
2827 /* Pre-adjust the loop counter. */
2828 operands[2] = GEN_INT (n_bytes - 2);
2829 output_asm_insn ("ldi %2,%1", operands);
2832 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2833 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2834 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2836 /* Handle the residual. */
2837 if (n_bytes % 2 != 0)
2838 output_asm_insn ("stb %%r0,0(%0)", operands);
2847 /* Count the number of insns necessary to handle this block move.
2849 Basic structure is the same as emit_block_move, except that we
2850 count insns rather than emit them. */
2853 compute_clrmem_length (rtx insn)
2855 rtx pat = PATTERN (insn);
2856 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2857 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2858 unsigned int n_insns = 0;
2860 /* We can't clear more than a word at a time because the PA
2861 has no longer integer move insns. */
2862 if (align > (TARGET_64BIT ? 8 : 4))
2863 align = (TARGET_64BIT ? 8 : 4);
2865 /* The basic loop. */
2869 if (n_bytes % (2 * align) != 0)
2871 if ((n_bytes % (2 * align)) >= align)
2874 if ((n_bytes % align) != 0)
2878 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2884 output_and (rtx *operands)
2886 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2888 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2889 int ls0, ls1, ms0, p, len;
2891 for (ls0 = 0; ls0 < 32; ls0++)
2892 if ((mask & (1 << ls0)) == 0)
2895 for (ls1 = ls0; ls1 < 32; ls1++)
2896 if ((mask & (1 << ls1)) != 0)
2899 for (ms0 = ls1; ms0 < 32; ms0++)
2900 if ((mask & (1 << ms0)) == 0)
2903 gcc_assert (ms0 == 32);
2911 operands[2] = GEN_INT (len);
2912 return "{extru|extrw,u} %1,31,%2,%0";
2916 /* We could use this `depi' for the case above as well, but `depi'
2917 requires one more register file access than an `extru'. */
2922 operands[2] = GEN_INT (p);
2923 operands[3] = GEN_INT (len);
2924 return "{depi|depwi} 0,%2,%3,%0";
2928 return "and %1,%2,%0";
2931 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2932 storing the result in operands[0]. */
2934 output_64bit_and (rtx *operands)
2936 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2938 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2939 int ls0, ls1, ms0, p, len;
2941 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2942 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2945 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2946 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2949 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2950 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2953 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
2955 if (ls1 == HOST_BITS_PER_WIDE_INT)
2961 operands[2] = GEN_INT (len);
2962 return "extrd,u %1,63,%2,%0";
2966 /* We could use this `depi' for the case above as well, but `depi'
2967 requires one more register file access than an `extru'. */
2972 operands[2] = GEN_INT (p);
2973 operands[3] = GEN_INT (len);
2974 return "depdi 0,%2,%3,%0";
2978 return "and %1,%2,%0";
2982 output_ior (rtx *operands)
2984 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2985 int bs0, bs1, p, len;
2987 if (INTVAL (operands[2]) == 0)
2988 return "copy %1,%0";
2990 for (bs0 = 0; bs0 < 32; bs0++)
2991 if ((mask & (1 << bs0)) != 0)
2994 for (bs1 = bs0; bs1 < 32; bs1++)
2995 if ((mask & (1 << bs1)) == 0)
2998 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3003 operands[2] = GEN_INT (p);
3004 operands[3] = GEN_INT (len);
3005 return "{depi|depwi} -1,%2,%3,%0";
3008 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3009 storing the result in operands[0]. */
3011 output_64bit_ior (rtx *operands)
3013 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3014 int bs0, bs1, p, len;
3016 if (INTVAL (operands[2]) == 0)
3017 return "copy %1,%0";
3019 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3020 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3023 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3024 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3027 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3028 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3033 operands[2] = GEN_INT (p);
3034 operands[3] = GEN_INT (len);
3035 return "depdi -1,%2,%3,%0";
3038 /* Target hook for assembling integer objects. This code handles
3039 aligned SI and DI integers specially since function references
3040 must be preceded by P%. */
3043 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3045 if (size == UNITS_PER_WORD
3047 && function_label_operand (x, VOIDmode))
3049 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3050 output_addr_const (asm_out_file, x);
3051 fputc ('\n', asm_out_file);
3054 return default_assemble_integer (x, size, aligned_p);
3057 /* Output an ascii string. */
3059 output_ascii (FILE *file, const char *p, int size)
3063 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3065 /* The HP assembler can only take strings of 256 characters at one
3066 time. This is a limitation on input line length, *not* the
3067 length of the string. Sigh. Even worse, it seems that the
3068 restriction is in number of input characters (see \xnn &
3069 \whatever). So we have to do this very carefully. */
3071 fputs ("\t.STRING \"", file);
3074 for (i = 0; i < size; i += 4)
3078 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3080 register unsigned int c = (unsigned char) p[i + io];
3082 if (c == '\"' || c == '\\')
3083 partial_output[co++] = '\\';
3084 if (c >= ' ' && c < 0177)
3085 partial_output[co++] = c;
3089 partial_output[co++] = '\\';
3090 partial_output[co++] = 'x';
3091 hexd = c / 16 - 0 + '0';
3093 hexd -= '9' - 'a' + 1;
3094 partial_output[co++] = hexd;
3095 hexd = c % 16 - 0 + '0';
3097 hexd -= '9' - 'a' + 1;
3098 partial_output[co++] = hexd;
3101 if (chars_output + co > 243)
3103 fputs ("\"\n\t.STRING \"", file);
3106 fwrite (partial_output, 1, (size_t) co, file);
3110 fputs ("\"\n", file);
3113 /* Try to rewrite floating point comparisons & branches to avoid
3114 useless add,tr insns.
3116 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3117 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3118 first attempt to remove useless add,tr insns. It is zero
3119 for the second pass as reorg sometimes leaves bogus REG_DEAD
3122 When CHECK_NOTES is zero we can only eliminate add,tr insns
3123 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3126 remove_useless_addtr_insns (int check_notes)
3129 static int pass = 0;
3131 /* This is fairly cheap, so always run it when optimizing. */
3135 int fbranch_count = 0;
3137 /* Walk all the insns in this function looking for fcmp & fbranch
3138 instructions. Keep track of how many of each we find. */
3139 for (insn = get_insns (); insn; insn = next_insn (insn))
3143 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3144 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3147 tmp = PATTERN (insn);
3149 /* It must be a set. */
3150 if (GET_CODE (tmp) != SET)
3153 /* If the destination is CCFP, then we've found an fcmp insn. */
3154 tmp = SET_DEST (tmp);
3155 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3161 tmp = PATTERN (insn);
3162 /* If this is an fbranch instruction, bump the fbranch counter. */
3163 if (GET_CODE (tmp) == SET
3164 && SET_DEST (tmp) == pc_rtx
3165 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3166 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3167 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3168 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3176 /* Find all floating point compare + branch insns. If possible,
3177 reverse the comparison & the branch to avoid add,tr insns. */
3178 for (insn = get_insns (); insn; insn = next_insn (insn))
3182 /* Ignore anything that isn't an INSN. */
3183 if (GET_CODE (insn) != INSN)
3186 tmp = PATTERN (insn);
3188 /* It must be a set. */
3189 if (GET_CODE (tmp) != SET)
3192 /* The destination must be CCFP, which is register zero. */
3193 tmp = SET_DEST (tmp);
3194 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3197 /* INSN should be a set of CCFP.
3199 See if the result of this insn is used in a reversed FP
3200 conditional branch. If so, reverse our condition and
3201 the branch. Doing so avoids useless add,tr insns. */
3202 next = next_insn (insn);
3205 /* Jumps, calls and labels stop our search. */
3206 if (GET_CODE (next) == JUMP_INSN
3207 || GET_CODE (next) == CALL_INSN
3208 || GET_CODE (next) == CODE_LABEL)
3211 /* As does another fcmp insn. */
3212 if (GET_CODE (next) == INSN
3213 && GET_CODE (PATTERN (next)) == SET
3214 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3215 && REGNO (SET_DEST (PATTERN (next))) == 0)
3218 next = next_insn (next);
3221 /* Is NEXT_INSN a branch? */
3223 && GET_CODE (next) == JUMP_INSN)
3225 rtx pattern = PATTERN (next);
3227 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3228 and CCFP dies, then reverse our conditional and the branch
3229 to avoid the add,tr. */
3230 if (GET_CODE (pattern) == SET
3231 && SET_DEST (pattern) == pc_rtx
3232 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3233 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3234 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3235 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3236 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3237 && (fcmp_count == fbranch_count
3239 && find_regno_note (next, REG_DEAD, 0))))
3241 /* Reverse the branch. */
3242 tmp = XEXP (SET_SRC (pattern), 1);
3243 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3244 XEXP (SET_SRC (pattern), 2) = tmp;
3245 INSN_CODE (next) = -1;
3247 /* Reverse our condition. */
3248 tmp = PATTERN (insn);
3249 PUT_CODE (XEXP (tmp, 1),
3250 (reverse_condition_maybe_unordered
3251 (GET_CODE (XEXP (tmp, 1)))));
3261 /* You may have trouble believing this, but this is the 32 bit HP-PA
3266 Variable arguments (optional; any number may be allocated)
3268 SP-(4*(N+9)) arg word N
3273 Fixed arguments (must be allocated; may remain unused)
3282 SP-32 External Data Pointer (DP)
3284 SP-24 External/stub RP (RP')
3288 SP-8 Calling Stub RP (RP'')
3293 SP-0 Stack Pointer (points to next available address)
3297 /* This function saves registers as follows. Registers marked with ' are
3298 this function's registers (as opposed to the previous function's).
3299 If a frame_pointer isn't needed, r4 is saved as a general register;
3300 the space for the frame pointer is still allocated, though, to keep
3306 SP (FP') Previous FP
3307 SP + 4 Alignment filler (sigh)
3308 SP + 8 Space for locals reserved here.
3312 SP + n All call saved register used.
3316 SP + o All call saved fp registers used.
3320 SP + p (SP') points to next available address.
3324 /* Global variables set by output_function_prologue(). */
3325 /* Size of frame. Need to know this to emit return insns from
3327 static HOST_WIDE_INT actual_fsize, local_fsize;
3328 static int save_fregs;
3330 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3331 Handle case where DISP > 8k by using the add_high_const patterns.
3333 Note in DISP > 8k case, we will leave the high part of the address
3334 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3337 store_reg (int reg, HOST_WIDE_INT disp, int base)
3339 rtx insn, dest, src, basereg;
3341 src = gen_rtx_REG (word_mode, reg);
3342 basereg = gen_rtx_REG (Pmode, base);
3343 if (VAL_14_BITS_P (disp))
3345 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3346 insn = emit_move_insn (dest, src);
3348 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3350 rtx delta = GEN_INT (disp);
3351 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3353 emit_move_insn (tmpreg, delta);
3354 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3358 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3359 gen_rtx_SET (VOIDmode, tmpreg,
3360 gen_rtx_PLUS (Pmode, basereg, delta)),
3362 RTX_FRAME_RELATED_P (insn) = 1;
3364 dest = gen_rtx_MEM (word_mode, tmpreg);
3365 insn = emit_move_insn (dest, src);
3369 rtx delta = GEN_INT (disp);
3370 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3371 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3373 emit_move_insn (tmpreg, high);
3374 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3375 insn = emit_move_insn (dest, src);
3379 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3380 gen_rtx_SET (VOIDmode,
3381 gen_rtx_MEM (word_mode,
3382 gen_rtx_PLUS (word_mode, basereg,
3390 RTX_FRAME_RELATED_P (insn) = 1;
3393 /* Emit RTL to store REG at the memory location specified by BASE and then
3394 add MOD to BASE. MOD must be <= 8k. */
3397 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3399 rtx insn, basereg, srcreg, delta;
3401 gcc_assert (VAL_14_BITS_P (mod));
3403 basereg = gen_rtx_REG (Pmode, base);
3404 srcreg = gen_rtx_REG (word_mode, reg);
3405 delta = GEN_INT (mod);
3407 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3410 RTX_FRAME_RELATED_P (insn) = 1;
3412 /* RTX_FRAME_RELATED_P must be set on each frame related set
3413 in a parallel with more than one element. */
3414 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3415 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3419 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3420 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3421 whether to add a frame note or not.
3423 In the DISP > 8k case, we leave the high part of the address in %r1.
3424 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3427 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3431 if (VAL_14_BITS_P (disp))
3433 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3434 plus_constant (gen_rtx_REG (Pmode, base), disp));
3436 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3438 rtx basereg = gen_rtx_REG (Pmode, base);
3439 rtx delta = GEN_INT (disp);
3440 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3442 emit_move_insn (tmpreg, delta);
3443 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3444 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3447 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3448 gen_rtx_SET (VOIDmode, tmpreg,
3449 gen_rtx_PLUS (Pmode, basereg, delta)),
3454 rtx basereg = gen_rtx_REG (Pmode, base);
3455 rtx delta = GEN_INT (disp);
3456 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3458 emit_move_insn (tmpreg,
3459 gen_rtx_PLUS (Pmode, basereg,
3460 gen_rtx_HIGH (Pmode, delta)));
3461 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3462 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3465 if (DO_FRAME_NOTES && note)
3466 RTX_FRAME_RELATED_P (insn) = 1;
3470 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3475 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3476 be consistent with the rounding and size calculation done here.
3477 Change them at the same time. */
3479 /* We do our own stack alignment. First, round the size of the
3480 stack locals up to a word boundary. */
3481 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3483 /* Space for previous frame pointer + filler. If any frame is
3484 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3485 waste some space here for the sake of HP compatibility. The
3486 first slot is only used when the frame pointer is needed. */
3487 if (size || frame_pointer_needed)
3488 size += STARTING_FRAME_OFFSET;
3490 /* If the current function calls __builtin_eh_return, then we need
3491 to allocate stack space for registers that will hold data for
3492 the exception handler. */
3493 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3497 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3499 size += i * UNITS_PER_WORD;
3502 /* Account for space used by the callee general register saves. */
3503 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3504 if (df_regs_ever_live_p (i))
3505 size += UNITS_PER_WORD;
3507 /* Account for space used by the callee floating point register saves. */
3508 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3509 if (df_regs_ever_live_p (i)
3510 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3514 /* We always save both halves of the FP register, so always
3515 increment the frame size by 8 bytes. */
3519 /* If any of the floating registers are saved, account for the
3520 alignment needed for the floating point register save block. */
3523 size = (size + 7) & ~7;
3528 /* The various ABIs include space for the outgoing parameters in the
3529 size of the current function's stack frame. We don't need to align
3530 for the outgoing arguments as their alignment is set by the final
3531 rounding for the frame as a whole. */
3532 size += current_function_outgoing_args_size;
3534 /* Allocate space for the fixed frame marker. This space must be
3535 allocated for any function that makes calls or allocates
3537 if (!current_function_is_leaf || size)
3538 size += TARGET_64BIT ? 48 : 32;
3540 /* Finally, round to the preferred stack boundary. */
3541 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3542 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3545 /* Generate the assembly code for function entry. FILE is a stdio
3546 stream to output the code to. SIZE is an int: how many units of
3547 temporary storage to allocate.
3549 Refer to the array `regs_ever_live' to determine which registers to
3550 save; `regs_ever_live[I]' is nonzero if register number I is ever
3551 used in the function. This function is responsible for knowing
3552 which registers should not be saved even if used. */
3554 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3555 of memory. If any fpu reg is used in the function, we allocate
3556 such a block here, at the bottom of the frame, just in case it's needed.
3558 If this function is a leaf procedure, then we may choose not
3559 to do a "save" insn. The decision about whether or not
3560 to do this is made in regclass.c. */
3563 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3565 /* The function's label and associated .PROC must never be
3566 separated and must be output *after* any profiling declarations
3567 to avoid changing spaces/subspaces within a procedure. */
3568 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3569 fputs ("\t.PROC\n", file);
3571 /* hppa_expand_prologue does the dirty work now. We just need
3572 to output the assembler directives which denote the start
3574 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3575 if (current_function_is_leaf)
3576 fputs (",NO_CALLS", file);
3578 fputs (",CALLS", file);
3580 fputs (",SAVE_RP", file);
3582 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3583 at the beginning of the frame and that it is used as the frame
3584 pointer for the frame. We do this because our current frame
3585 layout doesn't conform to that specified in the HP runtime
3586 documentation and we need a way to indicate to programs such as
3587 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3588 isn't used by HP compilers but is supported by the assembler.
3589 However, SAVE_SP is supposed to indicate that the previous stack
3590 pointer has been saved in the frame marker. */
3591 if (frame_pointer_needed)
3592 fputs (",SAVE_SP", file);
3594 /* Pass on information about the number of callee register saves
3595 performed in the prologue.
3597 The compiler is supposed to pass the highest register number
3598 saved, the assembler then has to adjust that number before
3599 entering it into the unwind descriptor (to account for any
3600 caller saved registers with lower register numbers than the
3601 first callee saved register). */
3603 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3606 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3608 fputs ("\n\t.ENTRY\n", file);
3610 remove_useless_addtr_insns (0);
3614 hppa_expand_prologue (void)
3616 int merge_sp_adjust_with_store = 0;
3617 HOST_WIDE_INT size = get_frame_size ();
3618 HOST_WIDE_INT offset;
3626 /* Compute total size for frame pointer, filler, locals and rounding to
3627 the next word boundary. Similar code appears in compute_frame_size
3628 and must be changed in tandem with this code. */
3629 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3630 if (local_fsize || frame_pointer_needed)
3631 local_fsize += STARTING_FRAME_OFFSET;
3633 actual_fsize = compute_frame_size (size, &save_fregs);
3635 /* Compute a few things we will use often. */
3636 tmpreg = gen_rtx_REG (word_mode, 1);
3638 /* Save RP first. The calling conventions manual states RP will
3639 always be stored into the caller's frame at sp - 20 or sp - 16
3640 depending on which ABI is in use. */
3641 if (df_regs_ever_live_p (2) || current_function_calls_eh_return)
3643 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3649 /* Allocate the local frame and set up the frame pointer if needed. */
3650 if (actual_fsize != 0)
3652 if (frame_pointer_needed)
3654 /* Copy the old frame pointer temporarily into %r1. Set up the
3655 new stack pointer, then store away the saved old frame pointer
3656 into the stack at sp and at the same time update the stack
3657 pointer by actual_fsize bytes. Two versions, first
3658 handles small (<8k) frames. The second handles large (>=8k)
3660 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3662 RTX_FRAME_RELATED_P (insn) = 1;
3664 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3666 RTX_FRAME_RELATED_P (insn) = 1;
3668 if (VAL_14_BITS_P (actual_fsize))
3669 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3672 /* It is incorrect to store the saved frame pointer at *sp,
3673 then increment sp (writes beyond the current stack boundary).
3675 So instead use stwm to store at *sp and post-increment the
3676 stack pointer as an atomic operation. Then increment sp to
3677 finish allocating the new frame. */
3678 HOST_WIDE_INT adjust1 = 8192 - 64;
3679 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3681 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3682 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3686 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3687 we need to store the previous stack pointer (frame pointer)
3688 into the frame marker on targets that use the HP unwind
3689 library. This allows the HP unwind library to be used to
3690 unwind GCC frames. However, we are not fully compatible
3691 with the HP library because our frame layout differs from
3692 that specified in the HP runtime specification.
3694 We don't want a frame note on this instruction as the frame
3695 marker moves during dynamic stack allocation.
3697 This instruction also serves as a blockage to prevent
3698 register spills from being scheduled before the stack
3699 pointer is raised. This is necessary as we store
3700 registers using the frame pointer as a base register,
3701 and the frame pointer is set before sp is raised. */
3702 if (TARGET_HPUX_UNWIND_LIBRARY)
3704 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3705 GEN_INT (TARGET_64BIT ? -8 : -4));
3707 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3711 emit_insn (gen_blockage ());
3713 /* no frame pointer needed. */
3716 /* In some cases we can perform the first callee register save
3717 and allocating the stack frame at the same time. If so, just
3718 make a note of it and defer allocating the frame until saving
3719 the callee registers. */
3720 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3721 merge_sp_adjust_with_store = 1;
3722 /* Can not optimize. Adjust the stack frame by actual_fsize
3725 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3730 /* Normal register save.
3732 Do not save the frame pointer in the frame_pointer_needed case. It
3733 was done earlier. */
3734 if (frame_pointer_needed)
3736 offset = local_fsize;
3738 /* Saving the EH return data registers in the frame is the simplest
3739 way to get the frame unwind information emitted. We put them
3740 just before the general registers. */
3741 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3743 unsigned int i, regno;
3747 regno = EH_RETURN_DATA_REGNO (i);
3748 if (regno == INVALID_REGNUM)
3751 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3752 offset += UNITS_PER_WORD;
3756 for (i = 18; i >= 4; i--)
3757 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3759 store_reg (i, offset, FRAME_POINTER_REGNUM);
3760 offset += UNITS_PER_WORD;
3763 /* Account for %r3 which is saved in a special place. */
3766 /* No frame pointer needed. */
3769 offset = local_fsize - actual_fsize;
3771 /* Saving the EH return data registers in the frame is the simplest
3772 way to get the frame unwind information emitted. */
3773 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3775 unsigned int i, regno;
3779 regno = EH_RETURN_DATA_REGNO (i);
3780 if (regno == INVALID_REGNUM)
3783 /* If merge_sp_adjust_with_store is nonzero, then we can
3784 optimize the first save. */
3785 if (merge_sp_adjust_with_store)
3787 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3788 merge_sp_adjust_with_store = 0;
3791 store_reg (regno, offset, STACK_POINTER_REGNUM);
3792 offset += UNITS_PER_WORD;
3796 for (i = 18; i >= 3; i--)
3797 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3799 /* If merge_sp_adjust_with_store is nonzero, then we can
3800 optimize the first GR save. */
3801 if (merge_sp_adjust_with_store)
3803 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3804 merge_sp_adjust_with_store = 0;
3807 store_reg (i, offset, STACK_POINTER_REGNUM);
3808 offset += UNITS_PER_WORD;
3812 /* If we wanted to merge the SP adjustment with a GR save, but we never
3813 did any GR saves, then just emit the adjustment here. */
3814 if (merge_sp_adjust_with_store)
3815 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3819 /* The hppa calling conventions say that %r19, the pic offset
3820 register, is saved at sp - 32 (in this function's frame)
3821 when generating PIC code. FIXME: What is the correct thing
3822 to do for functions which make no calls and allocate no
3823 frame? Do we need to allocate a frame, or can we just omit
3824 the save? For now we'll just omit the save.
3826 We don't want a note on this insn as the frame marker can
3827 move if there is a dynamic stack allocation. */
3828 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3830 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3832 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3836 /* Align pointer properly (doubleword boundary). */
3837 offset = (offset + 7) & ~7;
3839 /* Floating point register store. */
3844 /* First get the frame or stack pointer to the start of the FP register
3846 if (frame_pointer_needed)
3848 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3849 base = frame_pointer_rtx;
3853 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3854 base = stack_pointer_rtx;
3857 /* Now actually save the FP registers. */
3858 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3860 if (df_regs_ever_live_p (i)
3861 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3863 rtx addr, insn, reg;
3864 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3865 reg = gen_rtx_REG (DFmode, i);
3866 insn = emit_move_insn (addr, reg);
3869 RTX_FRAME_RELATED_P (insn) = 1;
3872 rtx mem = gen_rtx_MEM (DFmode,
3873 plus_constant (base, offset));
3875 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3876 gen_rtx_SET (VOIDmode, mem, reg),
3881 rtx meml = gen_rtx_MEM (SFmode,
3882 plus_constant (base, offset));
3883 rtx memr = gen_rtx_MEM (SFmode,
3884 plus_constant (base, offset + 4));
3885 rtx regl = gen_rtx_REG (SFmode, i);
3886 rtx regr = gen_rtx_REG (SFmode, i + 1);
3887 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3888 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3891 RTX_FRAME_RELATED_P (setl) = 1;
3892 RTX_FRAME_RELATED_P (setr) = 1;
3893 vec = gen_rtvec (2, setl, setr);
3895 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3896 gen_rtx_SEQUENCE (VOIDmode, vec),
3900 offset += GET_MODE_SIZE (DFmode);
3907 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3908 Handle case where DISP > 8k by using the add_high_const patterns. */
3911 load_reg (int reg, HOST_WIDE_INT disp, int base)
3913 rtx dest = gen_rtx_REG (word_mode, reg);
3914 rtx basereg = gen_rtx_REG (Pmode, base);
3917 if (VAL_14_BITS_P (disp))
3918 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3919 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3921 rtx delta = GEN_INT (disp);
3922 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3924 emit_move_insn (tmpreg, delta);
3925 if (TARGET_DISABLE_INDEXING)
3927 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3928 src = gen_rtx_MEM (word_mode, tmpreg);
3931 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3935 rtx delta = GEN_INT (disp);
3936 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3937 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3939 emit_move_insn (tmpreg, high);
3940 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3943 emit_move_insn (dest, src);
3946 /* Update the total code bytes output to the text section. */
3949 update_total_code_bytes (int nbytes)
3951 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
3952 && !IN_NAMED_SECTION_P (cfun->decl))
3954 if (INSN_ADDRESSES_SET_P ())
3956 unsigned long old_total = total_code_bytes;
3958 total_code_bytes += nbytes;
3960 /* Be prepared to handle overflows. */
3961 if (old_total > total_code_bytes)
3962 total_code_bytes = -1;
3965 total_code_bytes = -1;
3969 /* This function generates the assembly code for function exit.
3970 Args are as for output_function_prologue ().
3972 The function epilogue should not depend on the current stack
3973 pointer! It should use the frame pointer only. This is mandatory
3974 because of alloca; we also take advantage of it to omit stack
3975 adjustments before returning. */
3978 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3980 rtx insn = get_last_insn ();
3984 /* hppa_expand_epilogue does the dirty work now. We just need
3985 to output the assembler directives which denote the end
3988 To make debuggers happy, emit a nop if the epilogue was completely
3989 eliminated due to a volatile call as the last insn in the
3990 current function. That way the return address (in %r2) will
3991 always point to a valid instruction in the current function. */
3993 /* Get the last real insn. */
3994 if (GET_CODE (insn) == NOTE)
3995 insn = prev_real_insn (insn);
3997 /* If it is a sequence, then look inside. */
3998 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3999 insn = XVECEXP (PATTERN (insn), 0, 0);
4001 /* If insn is a CALL_INSN, then it must be a call to a volatile
4002 function (otherwise there would be epilogue insns). */
4003 if (insn && GET_CODE (insn) == CALL_INSN)
4005 fputs ("\tnop\n", file);
4009 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4011 if (TARGET_SOM && TARGET_GAS)
4013 /* We done with this subspace except possibly for some additional
4014 debug information. Forget that we are in this subspace to ensure
4015 that the next function is output in its own subspace. */
4017 cfun->machine->in_nsubspa = 2;
4020 if (INSN_ADDRESSES_SET_P ())
4022 insn = get_last_nonnote_insn ();
4023 last_address += INSN_ADDRESSES (INSN_UID (insn));
4025 last_address += insn_default_length (insn);
4026 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4027 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4030 /* Finally, update the total number of code bytes output so far. */
4031 update_total_code_bytes (last_address);
4035 hppa_expand_epilogue (void)
4038 HOST_WIDE_INT offset;
4039 HOST_WIDE_INT ret_off = 0;
4041 int merge_sp_adjust_with_load = 0;
4043 /* We will use this often. */
4044 tmpreg = gen_rtx_REG (word_mode, 1);
4046 /* Try to restore RP early to avoid load/use interlocks when
4047 RP gets used in the return (bv) instruction. This appears to still
4048 be necessary even when we schedule the prologue and epilogue. */
4051 ret_off = TARGET_64BIT ? -16 : -20;
4052 if (frame_pointer_needed)
4054 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4059 /* No frame pointer, and stack is smaller than 8k. */
4060 if (VAL_14_BITS_P (ret_off - actual_fsize))
4062 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4068 /* General register restores. */
4069 if (frame_pointer_needed)
4071 offset = local_fsize;
4073 /* If the current function calls __builtin_eh_return, then we need
4074 to restore the saved EH data registers. */
4075 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4077 unsigned int i, regno;
4081 regno = EH_RETURN_DATA_REGNO (i);
4082 if (regno == INVALID_REGNUM)
4085 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4086 offset += UNITS_PER_WORD;
4090 for (i = 18; i >= 4; i--)
4091 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4093 load_reg (i, offset, FRAME_POINTER_REGNUM);
4094 offset += UNITS_PER_WORD;
4099 offset = local_fsize - actual_fsize;
4101 /* If the current function calls __builtin_eh_return, then we need
4102 to restore the saved EH data registers. */
4103 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4105 unsigned int i, regno;
4109 regno = EH_RETURN_DATA_REGNO (i);
4110 if (regno == INVALID_REGNUM)
4113 /* Only for the first load.
4114 merge_sp_adjust_with_load holds the register load
4115 with which we will merge the sp adjustment. */
4116 if (merge_sp_adjust_with_load == 0
4118 && VAL_14_BITS_P (-actual_fsize))
4119 merge_sp_adjust_with_load = regno;
4121 load_reg (regno, offset, STACK_POINTER_REGNUM);
4122 offset += UNITS_PER_WORD;
4126 for (i = 18; i >= 3; i--)
4128 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4130 /* Only for the first load.
4131 merge_sp_adjust_with_load holds the register load
4132 with which we will merge the sp adjustment. */
4133 if (merge_sp_adjust_with_load == 0
4135 && VAL_14_BITS_P (-actual_fsize))
4136 merge_sp_adjust_with_load = i;
4138 load_reg (i, offset, STACK_POINTER_REGNUM);
4139 offset += UNITS_PER_WORD;
4144 /* Align pointer properly (doubleword boundary). */
4145 offset = (offset + 7) & ~7;
4147 /* FP register restores. */
4150 /* Adjust the register to index off of. */
4151 if (frame_pointer_needed)
4152 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4154 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4156 /* Actually do the restores now. */
4157 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4158 if (df_regs_ever_live_p (i)
4159 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4161 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4162 rtx dest = gen_rtx_REG (DFmode, i);
4163 emit_move_insn (dest, src);
4167 /* Emit a blockage insn here to keep these insns from being moved to
4168 an earlier spot in the epilogue, or into the main instruction stream.
4170 This is necessary as we must not cut the stack back before all the
4171 restores are finished. */
4172 emit_insn (gen_blockage ());
4174 /* Reset stack pointer (and possibly frame pointer). The stack
4175 pointer is initially set to fp + 64 to avoid a race condition. */
4176 if (frame_pointer_needed)
4178 rtx delta = GEN_INT (-64);
4180 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4181 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4183 /* If we were deferring a callee register restore, do it now. */
4184 else if (merge_sp_adjust_with_load)
4186 rtx delta = GEN_INT (-actual_fsize);
4187 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4189 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4191 else if (actual_fsize != 0)
4192 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4195 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4196 frame greater than 8k), do so now. */
4198 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4200 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4202 rtx sa = EH_RETURN_STACKADJ_RTX;
4204 emit_insn (gen_blockage ());
4205 emit_insn (TARGET_64BIT
4206 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4207 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4212 hppa_pic_save_rtx (void)
4214 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4217 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4218 #define NO_DEFERRED_PROFILE_COUNTERS 0
4222 /* Vector of funcdef numbers. */
4223 static VEC(int,heap) *funcdef_nos;
4225 /* Output deferred profile counters. */
4227 output_deferred_profile_counters (void)
4232 if (VEC_empty (int, funcdef_nos))
4235 switch_to_section (data_section);
4236 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4237 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4239 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4241 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4242 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4245 VEC_free (int, heap, funcdef_nos);
4249 hppa_profile_hook (int label_no)
4251 /* We use SImode for the address of the function in both 32 and
4252 64-bit code to avoid having to provide DImode versions of the
4253 lcla2 and load_offset_label_address insn patterns. */
4254 rtx reg = gen_reg_rtx (SImode);
4255 rtx label_rtx = gen_label_rtx ();
4256 rtx begin_label_rtx, call_insn;
4257 char begin_label_name[16];
4259 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4261 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4264 emit_move_insn (arg_pointer_rtx,
4265 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4268 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4270 /* The address of the function is loaded into %r25 with an instruction-
4271 relative sequence that avoids the use of relocations. The sequence
4272 is split so that the load_offset_label_address instruction can
4273 occupy the delay slot of the call to _mcount. */
4275 emit_insn (gen_lcla2 (reg, label_rtx));
4277 emit_insn (gen_lcla1 (reg, label_rtx));
4279 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4280 reg, begin_label_rtx, label_rtx));
4282 #if !NO_DEFERRED_PROFILE_COUNTERS
4284 rtx count_label_rtx, addr, r24;
4285 char count_label_name[16];
4287 VEC_safe_push (int, heap, funcdef_nos, label_no);
4288 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4289 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4291 addr = force_reg (Pmode, count_label_rtx);
4292 r24 = gen_rtx_REG (Pmode, 24);
4293 emit_move_insn (r24, addr);
4296 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4297 gen_rtx_SYMBOL_REF (Pmode,
4299 GEN_INT (TARGET_64BIT ? 24 : 12)));
4301 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4306 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4307 gen_rtx_SYMBOL_REF (Pmode,
4309 GEN_INT (TARGET_64BIT ? 16 : 8)));
4313 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4314 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4316 /* Indicate the _mcount call cannot throw, nor will it execute a
4318 REG_NOTES (call_insn)
4319 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
4322 /* Fetch the return address for the frame COUNT steps up from
4323 the current frame, after the prologue. FRAMEADDR is the
4324 frame pointer of the COUNT frame.
4326 We want to ignore any export stub remnants here. To handle this,
4327 we examine the code at the return address, and if it is an export
4328 stub, we return a memory rtx for the stub return address stored
4331 The value returned is used in two different ways:
4333 1. To find a function's caller.
4335 2. To change the return address for a function.
4337 This function handles most instances of case 1; however, it will
4338 fail if there are two levels of stubs to execute on the return
4339 path. The only way I believe that can happen is if the return value
4340 needs a parameter relocation, which never happens for C code.
4342 This function handles most instances of case 2; however, it will
4343 fail if we did not originally have stub code on the return path
4344 but will need stub code on the new return path. This can happen if
4345 the caller & callee are both in the main program, but the new
4346 return location is in a shared library. */
4349 return_addr_rtx (int count, rtx frameaddr)
4359 rp = get_hard_reg_initial_val (Pmode, 2);
4361 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4364 saved_rp = gen_reg_rtx (Pmode);
4365 emit_move_insn (saved_rp, rp);
4367 /* Get pointer to the instruction stream. We have to mask out the
4368 privilege level from the two low order bits of the return address
4369 pointer here so that ins will point to the start of the first
4370 instruction that would have been executed if we returned. */
4371 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4372 label = gen_label_rtx ();
4374 /* Check the instruction stream at the normal return address for the
4377 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4378 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4379 0x00011820 | stub+16: mtsp r1,sr0
4380 0xe0400002 | stub+20: be,n 0(sr0,rp)
4382 If it is an export stub, than our return address is really in
4385 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4386 NULL_RTX, SImode, 1);
4387 emit_jump_insn (gen_bne (label));
4389 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4390 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4391 emit_jump_insn (gen_bne (label));
4393 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4394 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4395 emit_jump_insn (gen_bne (label));
4397 /* 0xe0400002 must be specified as -532676606 so that it won't be
4398 rejected as an invalid immediate operand on 64-bit hosts. */
4399 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4400 GEN_INT (-532676606), NE, NULL_RTX, SImode, 1);
4402 /* If there is no export stub then just use the value saved from
4403 the return pointer register. */
4405 emit_jump_insn (gen_bne (label));
4407 /* Here we know that our return address points to an export
4408 stub. We don't want to return the address of the export stub,
4409 but rather the return address of the export stub. That return
4410 address is stored at -24[frameaddr]. */
4412 emit_move_insn (saved_rp,
4414 memory_address (Pmode,
4415 plus_constant (frameaddr,
4423 emit_bcond_fp (enum rtx_code code, rtx operand0)
4425 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4426 gen_rtx_IF_THEN_ELSE (VOIDmode,
4427 gen_rtx_fmt_ee (code,
4429 gen_rtx_REG (CCFPmode, 0),
4431 gen_rtx_LABEL_REF (VOIDmode, operand0),
4437 gen_cmp_fp (enum rtx_code code, rtx operand0, rtx operand1)
4439 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4440 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4443 /* Adjust the cost of a scheduling dependency. Return the new cost of
4444 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4447 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4449 enum attr_type attr_type;
4451 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4452 true dependencies as they are described with bypasses now. */
4453 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4456 if (! recog_memoized (insn))
4459 attr_type = get_attr_type (insn);
4461 switch (REG_NOTE_KIND (link))
4464 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4467 if (attr_type == TYPE_FPLOAD)
4469 rtx pat = PATTERN (insn);
4470 rtx dep_pat = PATTERN (dep_insn);
4471 if (GET_CODE (pat) == PARALLEL)
4473 /* This happens for the fldXs,mb patterns. */
4474 pat = XVECEXP (pat, 0, 0);
4476 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4477 /* If this happens, we have to extend this to schedule
4478 optimally. Return 0 for now. */
4481 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4483 if (! recog_memoized (dep_insn))
4485 switch (get_attr_type (dep_insn))
4492 case TYPE_FPSQRTSGL:
4493 case TYPE_FPSQRTDBL:
4494 /* A fpload can't be issued until one cycle before a
4495 preceding arithmetic operation has finished if
4496 the target of the fpload is any of the sources
4497 (or destination) of the arithmetic operation. */
4498 return insn_default_latency (dep_insn) - 1;
4505 else if (attr_type == TYPE_FPALU)
4507 rtx pat = PATTERN (insn);
4508 rtx dep_pat = PATTERN (dep_insn);
4509 if (GET_CODE (pat) == PARALLEL)
4511 /* This happens for the fldXs,mb patterns. */
4512 pat = XVECEXP (pat, 0, 0);
4514 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4515 /* If this happens, we have to extend this to schedule
4516 optimally. Return 0 for now. */
4519 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4521 if (! recog_memoized (dep_insn))
4523 switch (get_attr_type (dep_insn))
4527 case TYPE_FPSQRTSGL:
4528 case TYPE_FPSQRTDBL:
4529 /* An ALU flop can't be issued until two cycles before a
4530 preceding divide or sqrt operation has finished if
4531 the target of the ALU flop is any of the sources
4532 (or destination) of the divide or sqrt operation. */
4533 return insn_default_latency (dep_insn) - 2;
4541 /* For other anti dependencies, the cost is 0. */
4544 case REG_DEP_OUTPUT:
4545 /* Output dependency; DEP_INSN writes a register that INSN writes some
4547 if (attr_type == TYPE_FPLOAD)
4549 rtx pat = PATTERN (insn);
4550 rtx dep_pat = PATTERN (dep_insn);
4551 if (GET_CODE (pat) == PARALLEL)
4553 /* This happens for the fldXs,mb patterns. */
4554 pat = XVECEXP (pat, 0, 0);
4556 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4557 /* If this happens, we have to extend this to schedule
4558 optimally. Return 0 for now. */
4561 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4563 if (! recog_memoized (dep_insn))
4565 switch (get_attr_type (dep_insn))
4572 case TYPE_FPSQRTSGL:
4573 case TYPE_FPSQRTDBL:
4574 /* A fpload can't be issued until one cycle before a
4575 preceding arithmetic operation has finished if
4576 the target of the fpload is the destination of the
4577 arithmetic operation.
4579 Exception: For PA7100LC, PA7200 and PA7300, the cost
4580 is 3 cycles, unless they bundle together. We also
4581 pay the penalty if the second insn is a fpload. */
4582 return insn_default_latency (dep_insn) - 1;
4589 else if (attr_type == TYPE_FPALU)
4591 rtx pat = PATTERN (insn);
4592 rtx dep_pat = PATTERN (dep_insn);
4593 if (GET_CODE (pat) == PARALLEL)
4595 /* This happens for the fldXs,mb patterns. */
4596 pat = XVECEXP (pat, 0, 0);
4598 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4599 /* If this happens, we have to extend this to schedule
4600 optimally. Return 0 for now. */
4603 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4605 if (! recog_memoized (dep_insn))
4607 switch (get_attr_type (dep_insn))
4611 case TYPE_FPSQRTSGL:
4612 case TYPE_FPSQRTDBL:
4613 /* An ALU flop can't be issued until two cycles before a
4614 preceding divide or sqrt operation has finished if
4615 the target of the ALU flop is also the target of
4616 the divide or sqrt operation. */
4617 return insn_default_latency (dep_insn) - 2;
4625 /* For other output dependencies, the cost is 0. */
4633 /* Adjust scheduling priorities. We use this to try and keep addil
4634 and the next use of %r1 close together. */
4636 pa_adjust_priority (rtx insn, int priority)
4638 rtx set = single_set (insn);
4642 src = SET_SRC (set);
4643 dest = SET_DEST (set);
4644 if (GET_CODE (src) == LO_SUM
4645 && symbolic_operand (XEXP (src, 1), VOIDmode)
4646 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4649 else if (GET_CODE (src) == MEM
4650 && GET_CODE (XEXP (src, 0)) == LO_SUM
4651 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4652 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4655 else if (GET_CODE (dest) == MEM
4656 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4657 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4658 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4664 /* The 700 can only issue a single insn at a time.
4665 The 7XXX processors can issue two insns at a time.
4666 The 8000 can issue 4 insns at a time. */
4668 pa_issue_rate (void)
4672 case PROCESSOR_700: return 1;
4673 case PROCESSOR_7100: return 2;
4674 case PROCESSOR_7100LC: return 2;
4675 case PROCESSOR_7200: return 2;
4676 case PROCESSOR_7300: return 2;
4677 case PROCESSOR_8000: return 4;
4686 /* Return any length adjustment needed by INSN which already has its length
4687 computed as LENGTH. Return zero if no adjustment is necessary.
4689 For the PA: function calls, millicode calls, and backwards short
4690 conditional branches with unfilled delay slots need an adjustment by +1
4691 (to account for the NOP which will be inserted into the instruction stream).
4693 Also compute the length of an inline block move here as it is too
4694 complicated to express as a length attribute in pa.md. */
4696 pa_adjust_insn_length (rtx insn, int length)
4698 rtx pat = PATTERN (insn);
4700 /* Jumps inside switch tables which have unfilled delay slots need
4702 if (GET_CODE (insn) == JUMP_INSN
4703 && GET_CODE (pat) == PARALLEL
4704 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4706 /* Millicode insn with an unfilled delay slot. */
4707 else if (GET_CODE (insn) == INSN
4708 && GET_CODE (pat) != SEQUENCE
4709 && GET_CODE (pat) != USE
4710 && GET_CODE (pat) != CLOBBER
4711 && get_attr_type (insn) == TYPE_MILLI)
4713 /* Block move pattern. */
4714 else if (GET_CODE (insn) == INSN
4715 && GET_CODE (pat) == PARALLEL
4716 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4717 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4718 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4719 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4720 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4721 return compute_movmem_length (insn) - 4;
4722 /* Block clear pattern. */
4723 else if (GET_CODE (insn) == INSN
4724 && GET_CODE (pat) == PARALLEL
4725 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4726 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4727 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4728 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4729 return compute_clrmem_length (insn) - 4;
4730 /* Conditional branch with an unfilled delay slot. */
4731 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4733 /* Adjust a short backwards conditional with an unfilled delay slot. */
4734 if (GET_CODE (pat) == SET
4736 && ! forward_branch_p (insn))
4738 else if (GET_CODE (pat) == PARALLEL
4739 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4742 /* Adjust dbra insn with short backwards conditional branch with
4743 unfilled delay slot -- only for case where counter is in a
4744 general register register. */
4745 else if (GET_CODE (pat) == PARALLEL
4746 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4747 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4748 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4750 && ! forward_branch_p (insn))
4758 /* Print operand X (an rtx) in assembler syntax to file FILE.
4759 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4760 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4763 print_operand (FILE *file, rtx x, int code)
4768 /* Output a 'nop' if there's nothing for the delay slot. */
4769 if (dbr_sequence_length () == 0)
4770 fputs ("\n\tnop", file);
4773 /* Output a nullification completer if there's nothing for the */
4774 /* delay slot or nullification is requested. */
4775 if (dbr_sequence_length () == 0 ||
4777 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4781 /* Print out the second register name of a register pair.
4782 I.e., R (6) => 7. */
4783 fputs (reg_names[REGNO (x) + 1], file);
4786 /* A register or zero. */
4788 || (x == CONST0_RTX (DFmode))
4789 || (x == CONST0_RTX (SFmode)))
4791 fputs ("%r0", file);
4797 /* A register or zero (floating point). */
4799 || (x == CONST0_RTX (DFmode))
4800 || (x == CONST0_RTX (SFmode)))
4802 fputs ("%fr0", file);
4811 xoperands[0] = XEXP (XEXP (x, 0), 0);
4812 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4813 output_global_address (file, xoperands[1], 0);
4814 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4818 case 'C': /* Plain (C)ondition */
4820 switch (GET_CODE (x))
4823 fputs ("=", file); break;
4825 fputs ("<>", file); break;
4827 fputs (">", file); break;
4829 fputs (">=", file); break;
4831 fputs (">>=", file); break;
4833 fputs (">>", file); break;
4835 fputs ("<", file); break;
4837 fputs ("<=", file); break;
4839 fputs ("<<=", file); break;
4841 fputs ("<<", file); break;
4846 case 'N': /* Condition, (N)egated */
4847 switch (GET_CODE (x))
4850 fputs ("<>", file); break;
4852 fputs ("=", file); break;
4854 fputs ("<=", file); break;
4856 fputs ("<", file); break;
4858 fputs ("<<", file); break;
4860 fputs ("<<=", file); break;
4862 fputs (">=", file); break;
4864 fputs (">", file); break;
4866 fputs (">>", file); break;
4868 fputs (">>=", file); break;
4873 /* For floating point comparisons. Note that the output
4874 predicates are the complement of the desired mode. The
4875 conditions for GT, GE, LT, LE and LTGT cause an invalid
4876 operation exception if the result is unordered and this
4877 exception is enabled in the floating-point status register. */
4879 switch (GET_CODE (x))
4882 fputs ("!=", file); break;
4884 fputs ("=", file); break;
4886 fputs ("!>", file); break;
4888 fputs ("!>=", file); break;
4890 fputs ("!<", file); break;
4892 fputs ("!<=", file); break;
4894 fputs ("!<>", file); break;
4896 fputs ("!?<=", file); break;
4898 fputs ("!?<", file); break;
4900 fputs ("!?>=", file); break;
4902 fputs ("!?>", file); break;
4904 fputs ("!?=", file); break;
4906 fputs ("!?", file); break;
4908 fputs ("?", file); break;
4913 case 'S': /* Condition, operands are (S)wapped. */
4914 switch (GET_CODE (x))
4917 fputs ("=", file); break;
4919 fputs ("<>", file); break;
4921 fputs ("<", file); break;
4923 fputs ("<=", file); break;
4925 fputs ("<<=", file); break;
4927 fputs ("<<", file); break;
4929 fputs (">", file); break;
4931 fputs (">=", file); break;
4933 fputs (">>=", file); break;
4935 fputs (">>", file); break;
4940 case 'B': /* Condition, (B)oth swapped and negate. */
4941 switch (GET_CODE (x))
4944 fputs ("<>", file); break;
4946 fputs ("=", file); break;
4948 fputs (">=", file); break;
4950 fputs (">", file); break;
4952 fputs (">>", file); break;
4954 fputs (">>=", file); break;
4956 fputs ("<=", file); break;
4958 fputs ("<", file); break;
4960 fputs ("<<", file); break;
4962 fputs ("<<=", file); break;
4968 gcc_assert (GET_CODE (x) == CONST_INT);
4969 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4972 gcc_assert (GET_CODE (x) == CONST_INT);
4973 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4976 gcc_assert (GET_CODE (x) == CONST_INT);
4977 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4980 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
4981 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4984 gcc_assert (GET_CODE (x) == CONST_INT);
4985 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4988 gcc_assert (GET_CODE (x) == CONST_INT);
4989 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4992 if (GET_CODE (x) == CONST_INT)
4997 switch (GET_CODE (XEXP (x, 0)))
5001 if (ASSEMBLER_DIALECT == 0)
5002 fputs ("s,mb", file);
5004 fputs (",mb", file);
5008 if (ASSEMBLER_DIALECT == 0)
5009 fputs ("s,ma", file);
5011 fputs (",ma", file);
5014 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5015 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5017 if (ASSEMBLER_DIALECT == 0)
5020 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5021 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5023 if (ASSEMBLER_DIALECT == 0)
5024 fputs ("x,s", file);
5028 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5032 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5038 output_global_address (file, x, 0);
5041 output_global_address (file, x, 1);
5043 case 0: /* Don't do anything special */
5048 compute_zdepwi_operands (INTVAL (x), op);
5049 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5055 compute_zdepdi_operands (INTVAL (x), op);
5056 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5060 /* We can get here from a .vtable_inherit due to our
5061 CONSTANT_ADDRESS_P rejecting perfectly good constant
5067 if (GET_CODE (x) == REG)
5069 fputs (reg_names [REGNO (x)], file);
5070 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5076 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5077 && (REGNO (x) & 1) == 0)
5080 else if (GET_CODE (x) == MEM)
5082 int size = GET_MODE_SIZE (GET_MODE (x));
5083 rtx base = NULL_RTX;
5084 switch (GET_CODE (XEXP (x, 0)))
5088 base = XEXP (XEXP (x, 0), 0);
5089 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5093 base = XEXP (XEXP (x, 0), 0);
5094 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5097 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5098 fprintf (file, "%s(%s)",
5099 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5100 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5101 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5102 fprintf (file, "%s(%s)",
5103 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5104 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5105 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5106 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5108 /* Because the REG_POINTER flag can get lost during reload,
5109 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5110 index and base registers in the combined move patterns. */
5111 rtx base = XEXP (XEXP (x, 0), 1);
5112 rtx index = XEXP (XEXP (x, 0), 0);
5114 fprintf (file, "%s(%s)",
5115 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5118 output_address (XEXP (x, 0));
5121 output_address (XEXP (x, 0));
5126 output_addr_const (file, x);
5129 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5132 output_global_address (FILE *file, rtx x, int round_constant)
5135 /* Imagine (high (const (plus ...))). */
5136 if (GET_CODE (x) == HIGH)
5139 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5140 output_addr_const (file, x);
5141 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5143 output_addr_const (file, x);
5144 fputs ("-$global$", file);
5146 else if (GET_CODE (x) == CONST)
5148 const char *sep = "";
5149 int offset = 0; /* assembler wants -$global$ at end */
5150 rtx base = NULL_RTX;
5152 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5155 base = XEXP (XEXP (x, 0), 0);
5156 output_addr_const (file, base);
5159 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5165 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5168 base = XEXP (XEXP (x, 0), 1);
5169 output_addr_const (file, base);
5172 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5178 /* How bogus. The compiler is apparently responsible for
5179 rounding the constant if it uses an LR field selector.
5181 The linker and/or assembler seem a better place since
5182 they have to do this kind of thing already.
5184 If we fail to do this, HP's optimizing linker may eliminate
5185 an addil, but not update the ldw/stw/ldo instruction that
5186 uses the result of the addil. */
5188 offset = ((offset + 0x1000) & ~0x1fff);
5190 switch (GET_CODE (XEXP (x, 0)))
5203 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5211 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5212 fputs ("-$global$", file);
5214 fprintf (file, "%s%d", sep, offset);
5217 output_addr_const (file, x);
5220 /* Output boilerplate text to appear at the beginning of the file.
5221 There are several possible versions. */
5222 #define aputs(x) fputs(x, asm_out_file)
5224 pa_file_start_level (void)
5227 aputs ("\t.LEVEL 2.0w\n");
5228 else if (TARGET_PA_20)
5229 aputs ("\t.LEVEL 2.0\n");
5230 else if (TARGET_PA_11)
5231 aputs ("\t.LEVEL 1.1\n");
5233 aputs ("\t.LEVEL 1.0\n");
5237 pa_file_start_space (int sortspace)
5239 aputs ("\t.SPACE $PRIVATE$");
5242 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5243 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5244 "\n\t.SPACE $TEXT$");
5247 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5248 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5252 pa_file_start_file (int want_version)
5254 if (write_symbols != NO_DEBUG)
5256 output_file_directive (asm_out_file, main_input_filename);
5258 aputs ("\t.version\t\"01.01\"\n");
5263 pa_file_start_mcount (const char *aswhat)
5266 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5270 pa_elf_file_start (void)
5272 pa_file_start_level ();
5273 pa_file_start_mcount ("ENTRY");
5274 pa_file_start_file (0);
5278 pa_som_file_start (void)
5280 pa_file_start_level ();
5281 pa_file_start_space (0);
5282 aputs ("\t.IMPORT $global$,DATA\n"
5283 "\t.IMPORT $$dyncall,MILLICODE\n");
5284 pa_file_start_mcount ("CODE");
5285 pa_file_start_file (0);
5289 pa_linux_file_start (void)
5291 pa_file_start_file (1);
5292 pa_file_start_level ();
5293 pa_file_start_mcount ("CODE");
5297 pa_hpux64_gas_file_start (void)
5299 pa_file_start_level ();
5300 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5302 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5304 pa_file_start_file (1);
5308 pa_hpux64_hpas_file_start (void)
5310 pa_file_start_level ();
5311 pa_file_start_space (1);
5312 pa_file_start_mcount ("CODE");
5313 pa_file_start_file (0);
5317 /* Search the deferred plabel list for SYMBOL and return its internal
5318 label. If an entry for SYMBOL is not found, a new entry is created. */
5321 get_deferred_plabel (rtx symbol)
5323 const char *fname = XSTR (symbol, 0);
5326 /* See if we have already put this function on the list of deferred
5327 plabels. This list is generally small, so a liner search is not
5328 too ugly. If it proves too slow replace it with something faster. */
5329 for (i = 0; i < n_deferred_plabels; i++)
5330 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5333 /* If the deferred plabel list is empty, or this entry was not found
5334 on the list, create a new entry on the list. */
5335 if (deferred_plabels == NULL || i == n_deferred_plabels)
5339 if (deferred_plabels == 0)
5340 deferred_plabels = (struct deferred_plabel *)
5341 ggc_alloc (sizeof (struct deferred_plabel));
5343 deferred_plabels = (struct deferred_plabel *)
5344 ggc_realloc (deferred_plabels,
5345 ((n_deferred_plabels + 1)
5346 * sizeof (struct deferred_plabel)));
5348 i = n_deferred_plabels++;
5349 deferred_plabels[i].internal_label = gen_label_rtx ();
5350 deferred_plabels[i].symbol = symbol;
5352 /* Gross. We have just implicitly taken the address of this
5353 function. Mark it in the same manner as assemble_name. */
5354 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5356 mark_referenced (id);
5359 return deferred_plabels[i].internal_label;
5363 output_deferred_plabels (void)
5367 /* If we have some deferred plabels, then we need to switch into the
5368 data or readonly data section, and align it to a 4 byte boundary
5369 before outputting the deferred plabels. */
5370 if (n_deferred_plabels)
5372 switch_to_section (flag_pic ? data_section : readonly_data_section);
5373 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5376 /* Now output the deferred plabels. */
5377 for (i = 0; i < n_deferred_plabels; i++)
5379 targetm.asm_out.internal_label (asm_out_file, "L",
5380 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5381 assemble_integer (deferred_plabels[i].symbol,
5382 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5386 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5387 /* Initialize optabs to point to HPUX long double emulation routines. */
5389 pa_hpux_init_libfuncs (void)
5391 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5392 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5393 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5394 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5395 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5396 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5397 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5398 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5399 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5401 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5402 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5403 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5404 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5405 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5406 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5407 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5409 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5410 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5411 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5412 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5414 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5415 ? "__U_Qfcnvfxt_quad_to_sgl"
5416 : "_U_Qfcnvfxt_quad_to_sgl");
5417 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5418 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5419 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5421 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5422 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5423 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5424 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5428 /* HP's millicode routines mean something special to the assembler.
5429 Keep track of which ones we have used. */
5431 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5432 static void import_milli (enum millicodes);
5433 static char imported[(int) end1000];
5434 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5435 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5436 #define MILLI_START 10
5439 import_milli (enum millicodes code)
5441 char str[sizeof (import_string)];
5443 if (!imported[(int) code])
5445 imported[(int) code] = 1;
5446 strcpy (str, import_string);
5447 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5448 output_asm_insn (str, 0);
5452 /* The register constraints have put the operands and return value in
5453 the proper registers. */
5456 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5458 import_milli (mulI);
5459 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5462 /* Emit the rtl for doing a division by a constant. */
5464 /* Do magic division millicodes exist for this value? */
5465 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5467 /* We'll use an array to keep track of the magic millicodes and
5468 whether or not we've used them already. [n][0] is signed, [n][1] is
5471 static int div_milli[16][2];
5474 emit_hpdiv_const (rtx *operands, int unsignedp)
5476 if (GET_CODE (operands[2]) == CONST_INT
5477 && INTVAL (operands[2]) > 0
5478 && INTVAL (operands[2]) < 16
5479 && magic_milli[INTVAL (operands[2])])
5481 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5483 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5487 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5488 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5490 gen_rtx_REG (SImode, 26),
5492 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5493 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5494 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5495 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5496 gen_rtx_CLOBBER (VOIDmode, ret))));
5497 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5504 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5508 /* If the divisor is a constant, try to use one of the special
5510 if (GET_CODE (operands[0]) == CONST_INT)
5512 static char buf[100];
5513 divisor = INTVAL (operands[0]);
5514 if (!div_milli[divisor][unsignedp])
5516 div_milli[divisor][unsignedp] = 1;
5518 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5520 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5524 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5525 INTVAL (operands[0]));
5526 return output_millicode_call (insn,
5527 gen_rtx_SYMBOL_REF (SImode, buf));
5531 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5532 INTVAL (operands[0]));
5533 return output_millicode_call (insn,
5534 gen_rtx_SYMBOL_REF (SImode, buf));
5537 /* Divisor isn't a special constant. */
5542 import_milli (divU);
5543 return output_millicode_call (insn,
5544 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5548 import_milli (divI);
5549 return output_millicode_call (insn,
5550 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5555 /* Output a $$rem millicode to do mod. */
5558 output_mod_insn (int unsignedp, rtx insn)
5562 import_milli (remU);
5563 return output_millicode_call (insn,
5564 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5568 import_milli (remI);
5569 return output_millicode_call (insn,
5570 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5575 output_arg_descriptor (rtx call_insn)
5577 const char *arg_regs[4];
5578 enum machine_mode arg_mode;
5580 int i, output_flag = 0;
5583 /* We neither need nor want argument location descriptors for the
5584 64bit runtime environment or the ELF32 environment. */
5585 if (TARGET_64BIT || TARGET_ELF32)
5588 for (i = 0; i < 4; i++)
5591 /* Specify explicitly that no argument relocations should take place
5592 if using the portable runtime calling conventions. */
5593 if (TARGET_PORTABLE_RUNTIME)
5595 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5600 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5601 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5602 link; link = XEXP (link, 1))
5604 rtx use = XEXP (link, 0);
5606 if (! (GET_CODE (use) == USE
5607 && GET_CODE (XEXP (use, 0)) == REG
5608 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5611 arg_mode = GET_MODE (XEXP (use, 0));
5612 regno = REGNO (XEXP (use, 0));
5613 if (regno >= 23 && regno <= 26)
5615 arg_regs[26 - regno] = "GR";
5616 if (arg_mode == DImode)
5617 arg_regs[25 - regno] = "GR";
5619 else if (regno >= 32 && regno <= 39)
5621 if (arg_mode == SFmode)
5622 arg_regs[(regno - 32) / 2] = "FR";
5625 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5626 arg_regs[(regno - 34) / 2] = "FR";
5627 arg_regs[(regno - 34) / 2 + 1] = "FU";
5629 arg_regs[(regno - 34) / 2] = "FU";
5630 arg_regs[(regno - 34) / 2 + 1] = "FR";
5635 fputs ("\t.CALL ", asm_out_file);
5636 for (i = 0; i < 4; i++)
5641 fputc (',', asm_out_file);
5642 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5645 fputc ('\n', asm_out_file);
5648 static enum reg_class
5649 pa_secondary_reload (bool in_p, rtx x, enum reg_class class,
5650 enum machine_mode mode, secondary_reload_info *sri)
5652 int is_symbolic, regno;
5654 /* Handle the easy stuff first. */
5655 if (class == R1_REGS)
5661 if (class == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5667 /* If we have something like (mem (mem (...)), we can safely assume the
5668 inner MEM will end up in a general register after reloading, so there's
5669 no need for a secondary reload. */
5670 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5673 /* Trying to load a constant into a FP register during PIC code
5674 generation requires %r1 as a scratch register. */
5676 && (mode == SImode || mode == DImode)
5677 && FP_REG_CLASS_P (class)
5678 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5680 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5681 : CODE_FOR_reload_indi_r1);
5685 /* Profiling showed the PA port spends about 1.3% of its compilation
5686 time in true_regnum from calls inside pa_secondary_reload_class. */
5687 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5688 regno = true_regnum (x);
5690 /* In order to allow 14-bit displacements in integer loads and stores,
5691 we need to prevent reload from generating out of range integer mode
5692 loads and stores to the floating point registers. Previously, we
5693 used to call for a secondary reload and have emit_move_sequence()
5694 fix the instruction sequence. However, reload occasionally wouldn't
5695 generate the reload and we would end up with an invalid REG+D memory
5696 address. So, now we use an intermediate general register for most
5697 memory loads and stores. */
5698 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5699 && GET_MODE_CLASS (mode) == MODE_INT
5700 && FP_REG_CLASS_P (class))
5702 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5703 the secondary reload needed for a pseudo. It never passes a
5705 if (GET_CODE (x) == MEM)
5709 /* We don't need an intermediate for indexed and LO_SUM DLT
5710 memory addresses. When INT14_OK_STRICT is true, it might
5711 appear that we could directly allow register indirect
5712 memory addresses. However, this doesn't work because we
5713 don't support SUBREGs in floating-point register copies
5714 and reload doesn't tell us when it's going to use a SUBREG. */
5715 if (IS_INDEX_ADDR_P (x)
5716 || IS_LO_SUM_DLT_ADDR_P (x))
5719 /* Otherwise, we need an intermediate general register. */
5720 return GENERAL_REGS;
5723 /* Request a secondary reload with a general scratch register
5724 for everthing else. ??? Could symbolic operands be handled
5725 directly when generating non-pic PA 2.0 code? */
5726 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5730 /* We need a secondary register (GPR) for copies between the SAR
5731 and anything other than a general register. */
5732 if (class == SHIFT_REGS && (regno <= 0 || regno >= 32))
5734 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5738 /* A SAR<->FP register copy requires a secondary register (GPR) as
5739 well as secondary memory. */
5740 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5741 && (REGNO_REG_CLASS (regno) == SHIFT_REGS
5742 && FP_REG_CLASS_P (class)))
5744 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5748 /* Secondary reloads of symbolic operands require %r1 as a scratch
5749 register when we're generating PIC code and when the operand isn't
5751 if (GET_CODE (x) == HIGH)
5754 /* Profiling has showed GCC spends about 2.6% of its compilation
5755 time in symbolic_operand from calls inside pa_secondary_reload_class.
5756 So, we use an inline copy to avoid useless work. */
5757 switch (GET_CODE (x))
5762 is_symbolic = !SYMBOL_REF_TLS_MODEL (x);
5769 is_symbolic = (((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
5770 && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
5771 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
5772 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5779 if (is_symbolic && (flag_pic || !read_only_operand (x, VOIDmode)))
5781 gcc_assert (mode == SImode || mode == DImode);
5782 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5783 : CODE_FOR_reload_indi_r1);
5789 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
5790 is only marked as live on entry by df-scan when it is a fixed
5791 register. It isn't a fixed register in the 64-bit runtime,
5792 so we need to mark it here. */
5795 pa_extra_live_on_entry (bitmap regs)
5798 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
5801 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
5802 to prevent it from being deleted. */
5805 pa_eh_return_handler_rtx (void)
5809 tmp = gen_rtx_PLUS (word_mode, frame_pointer_rtx,
5810 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
5811 tmp = gen_rtx_MEM (word_mode, tmp);
5816 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5817 by invisible reference. As a GCC extension, we also pass anything
5818 with a zero or variable size by reference.
5820 The 64-bit runtime does not describe passing any types by invisible
5821 reference. The internals of GCC can't currently handle passing
5822 empty structures, and zero or variable length arrays when they are
5823 not passed entirely on the stack or by reference. Thus, as a GCC
5824 extension, we pass these types by reference. The HP compiler doesn't
5825 support these types, so hopefully there shouldn't be any compatibility
5826 issues. This may have to be revisited when HP releases a C99 compiler
5827 or updates the ABI. */
5830 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5831 enum machine_mode mode, const_tree type,
5832 bool named ATTRIBUTE_UNUSED)
5837 size = int_size_in_bytes (type);
5839 size = GET_MODE_SIZE (mode);
5844 return size <= 0 || size > 8;
5848 function_arg_padding (enum machine_mode mode, const_tree type)
5851 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5853 /* Return none if justification is not required. */
5855 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5856 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5859 /* The directions set here are ignored when a BLKmode argument larger
5860 than a word is placed in a register. Different code is used for
5861 the stack and registers. This makes it difficult to have a
5862 consistent data representation for both the stack and registers.
5863 For both runtimes, the justification and padding for arguments on
5864 the stack and in registers should be identical. */
5866 /* The 64-bit runtime specifies left justification for aggregates. */
5869 /* The 32-bit runtime architecture specifies right justification.
5870 When the argument is passed on the stack, the argument is padded
5871 with garbage on the left. The HP compiler pads with zeros. */
5875 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5882 /* Do what is necessary for `va_start'. We look at the current function
5883 to determine if stdargs or varargs is used and fill in an initial
5884 va_list. A pointer to this constructor is returned. */
5887 hppa_builtin_saveregs (void)
5890 tree fntype = TREE_TYPE (current_function_decl);
5891 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5892 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5893 != void_type_node)))
5894 ? UNITS_PER_WORD : 0);
5897 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5899 offset = current_function_arg_offset_rtx;
5905 /* Adjust for varargs/stdarg differences. */
5907 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5909 offset = current_function_arg_offset_rtx;
5911 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5912 from the incoming arg pointer and growing to larger addresses. */
5913 for (i = 26, off = -64; i >= 19; i--, off += 8)
5914 emit_move_insn (gen_rtx_MEM (word_mode,
5915 plus_constant (arg_pointer_rtx, off)),
5916 gen_rtx_REG (word_mode, i));
5918 /* The incoming args pointer points just beyond the flushback area;
5919 normally this is not a serious concern. However, when we are doing
5920 varargs/stdargs we want to make the arg pointer point to the start
5921 of the incoming argument area. */
5922 emit_move_insn (virtual_incoming_args_rtx,
5923 plus_constant (arg_pointer_rtx, -64));
5925 /* Now return a pointer to the first anonymous argument. */
5926 return copy_to_reg (expand_binop (Pmode, add_optab,
5927 virtual_incoming_args_rtx,
5928 offset, 0, 0, OPTAB_LIB_WIDEN));
5931 /* Store general registers on the stack. */
5932 dest = gen_rtx_MEM (BLKmode,
5933 plus_constant (current_function_internal_arg_pointer,
5935 set_mem_alias_set (dest, get_varargs_alias_set ());
5936 set_mem_align (dest, BITS_PER_WORD);
5937 move_block_from_reg (23, dest, 4);
5939 /* move_block_from_reg will emit code to store the argument registers
5940 individually as scalar stores.
5942 However, other insns may later load from the same addresses for
5943 a structure load (passing a struct to a varargs routine).
5945 The alias code assumes that such aliasing can never happen, so we
5946 have to keep memory referencing insns from moving up beyond the
5947 last argument register store. So we emit a blockage insn here. */
5948 emit_insn (gen_blockage ());
5950 return copy_to_reg (expand_binop (Pmode, add_optab,
5951 current_function_internal_arg_pointer,
5952 offset, 0, 0, OPTAB_LIB_WIDEN));
5956 hppa_va_start (tree valist, rtx nextarg)
5958 nextarg = expand_builtin_saveregs ();
5959 std_expand_builtin_va_start (valist, nextarg);
5963 hppa_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p, tree *post_p)
5967 /* Args grow upward. We can use the generic routines. */
5968 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5970 else /* !TARGET_64BIT */
5972 tree ptr = build_pointer_type (type);
5975 unsigned int size, ofs;
5978 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
5982 ptr = build_pointer_type (type);
5984 size = int_size_in_bytes (type);
5985 valist_type = TREE_TYPE (valist);
5987 /* Args grow down. Not handled by generic routines. */
5989 u = fold_convert (sizetype, size_in_bytes (type));
5990 u = fold_build1 (NEGATE_EXPR, sizetype, u);
5991 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
5993 /* Copied from va-pa.h, but we probably don't need to align to
5994 word size, since we generate and preserve that invariant. */
5995 u = size_int (size > 4 ? -8 : -4);
5996 t = fold_convert (sizetype, t);
5997 t = build2 (BIT_AND_EXPR, sizetype, t, u);
5998 t = fold_convert (valist_type, t);
6000 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6002 ofs = (8 - size) % 4;
6006 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
6009 t = fold_convert (ptr, t);
6010 t = build_va_arg_indirect_ref (t);
6013 t = build_va_arg_indirect_ref (t);
6019 /* True if MODE is valid for the target. By "valid", we mean able to
6020 be manipulated in non-trivial ways. In particular, this means all
6021 the arithmetic is supported.
6023 Currently, TImode is not valid as the HP 64-bit runtime documentation
6024 doesn't document the alignment and calling conventions for this type.
6025 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6026 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6029 pa_scalar_mode_supported_p (enum machine_mode mode)
6031 int precision = GET_MODE_PRECISION (mode);
6033 switch (GET_MODE_CLASS (mode))
6035 case MODE_PARTIAL_INT:
6037 if (precision == CHAR_TYPE_SIZE)
6039 if (precision == SHORT_TYPE_SIZE)
6041 if (precision == INT_TYPE_SIZE)
6043 if (precision == LONG_TYPE_SIZE)
6045 if (precision == LONG_LONG_TYPE_SIZE)
6050 if (precision == FLOAT_TYPE_SIZE)
6052 if (precision == DOUBLE_TYPE_SIZE)
6054 if (precision == LONG_DOUBLE_TYPE_SIZE)
6058 case MODE_DECIMAL_FLOAT:
6066 /* This routine handles all the normal conditional branch sequences we
6067 might need to generate. It handles compare immediate vs compare
6068 register, nullification of delay slots, varying length branches,
6069 negated branches, and all combinations of the above. It returns the
6070 output appropriate to emit the branch corresponding to all given
6074 output_cbranch (rtx *operands, int negated, rtx insn)
6076 static char buf[100];
6078 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6079 int length = get_attr_length (insn);
6082 /* A conditional branch to the following instruction (e.g. the delay slot)
6083 is asking for a disaster. This can happen when not optimizing and
6084 when jump optimization fails.
6086 While it is usually safe to emit nothing, this can fail if the
6087 preceding instruction is a nullified branch with an empty delay
6088 slot and the same branch target as this branch. We could check
6089 for this but jump optimization should eliminate nop jumps. It
6090 is always safe to emit a nop. */
6091 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6094 /* The doubleword form of the cmpib instruction doesn't have the LEU
6095 and GTU conditions while the cmpb instruction does. Since we accept
6096 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6097 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6098 operands[2] = gen_rtx_REG (DImode, 0);
6099 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6100 operands[1] = gen_rtx_REG (DImode, 0);
6102 /* If this is a long branch with its delay slot unfilled, set `nullify'
6103 as it can nullify the delay slot and save a nop. */
6104 if (length == 8 && dbr_sequence_length () == 0)
6107 /* If this is a short forward conditional branch which did not get
6108 its delay slot filled, the delay slot can still be nullified. */
6109 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6110 nullify = forward_branch_p (insn);
6112 /* A forward branch over a single nullified insn can be done with a
6113 comclr instruction. This avoids a single cycle penalty due to
6114 mis-predicted branch if we fall through (branch not taken). */
6116 && next_real_insn (insn) != 0
6117 && get_attr_length (next_real_insn (insn)) == 4
6118 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6124 /* All short conditional branches except backwards with an unfilled
6128 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6130 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6131 if (GET_MODE (operands[1]) == DImode)
6134 strcat (buf, "%B3");
6136 strcat (buf, "%S3");
6138 strcat (buf, " %2,%r1,%%r0");
6140 strcat (buf, ",n %2,%r1,%0");
6142 strcat (buf, " %2,%r1,%0");
6145 /* All long conditionals. Note a short backward branch with an
6146 unfilled delay slot is treated just like a long backward branch
6147 with an unfilled delay slot. */
6149 /* Handle weird backwards branch with a filled delay slot
6150 which is nullified. */
6151 if (dbr_sequence_length () != 0
6152 && ! forward_branch_p (insn)
6155 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6156 if (GET_MODE (operands[1]) == DImode)
6159 strcat (buf, "%S3");
6161 strcat (buf, "%B3");
6162 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6164 /* Handle short backwards branch with an unfilled delay slot.
6165 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6166 taken and untaken branches. */
6167 else if (dbr_sequence_length () == 0
6168 && ! forward_branch_p (insn)
6169 && INSN_ADDRESSES_SET_P ()
6170 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6171 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6173 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6174 if (GET_MODE (operands[1]) == DImode)
6177 strcat (buf, "%B3 %2,%r1,%0%#");
6179 strcat (buf, "%S3 %2,%r1,%0%#");
6183 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6184 if (GET_MODE (operands[1]) == DImode)
6187 strcat (buf, "%S3");
6189 strcat (buf, "%B3");
6191 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6193 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6198 /* The reversed conditional branch must branch over one additional
6199 instruction if the delay slot is filled and needs to be extracted
6200 by output_lbranch. If the delay slot is empty or this is a
6201 nullified forward branch, the instruction after the reversed
6202 condition branch must be nullified. */
6203 if (dbr_sequence_length () == 0
6204 || (nullify && forward_branch_p (insn)))
6208 operands[4] = GEN_INT (length);
6213 operands[4] = GEN_INT (length + 4);
6216 /* Create a reversed conditional branch which branches around
6217 the following insns. */
6218 if (GET_MODE (operands[1]) != DImode)
6224 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6227 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6233 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6236 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6245 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6248 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6254 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6257 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6261 output_asm_insn (buf, operands);
6262 return output_lbranch (operands[0], insn, xdelay);
6267 /* This routine handles output of long unconditional branches that
6268 exceed the maximum range of a simple branch instruction. Since
6269 we don't have a register available for the branch, we save register
6270 %r1 in the frame marker, load the branch destination DEST into %r1,
6271 execute the branch, and restore %r1 in the delay slot of the branch.
6273 Since long branches may have an insn in the delay slot and the
6274 delay slot is used to restore %r1, we in general need to extract
6275 this insn and execute it before the branch. However, to facilitate
6276 use of this function by conditional branches, we also provide an
6277 option to not extract the delay insn so that it will be emitted
6278 after the long branch. So, if there is an insn in the delay slot,
6279 it is extracted if XDELAY is nonzero.
6281 The lengths of the various long-branch sequences are 20, 16 and 24
6282 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6285 output_lbranch (rtx dest, rtx insn, int xdelay)
6289 xoperands[0] = dest;
6291 /* First, free up the delay slot. */
6292 if (xdelay && dbr_sequence_length () != 0)
6294 /* We can't handle a jump in the delay slot. */
6295 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6297 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6300 /* Now delete the delay insn. */
6301 SET_INSN_DELETED (NEXT_INSN (insn));
6304 /* Output an insn to save %r1. The runtime documentation doesn't
6305 specify whether the "Clean Up" slot in the callers frame can
6306 be clobbered by the callee. It isn't copied by HP's builtin
6307 alloca, so this suggests that it can be clobbered if necessary.
6308 The "Static Link" location is copied by HP builtin alloca, so
6309 we avoid using it. Using the cleanup slot might be a problem
6310 if we have to interoperate with languages that pass cleanup
6311 information. However, it should be possible to handle these
6312 situations with GCC's asm feature.
6314 The "Current RP" slot is reserved for the called procedure, so
6315 we try to use it when we don't have a frame of our own. It's
6316 rather unlikely that we won't have a frame when we need to emit
6319 Really the way to go long term is a register scavenger; goto
6320 the target of the jump and find a register which we can use
6321 as a scratch to hold the value in %r1. Then, we wouldn't have
6322 to free up the delay slot or clobber a slot that may be needed
6323 for other purposes. */
6326 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6327 /* Use the return pointer slot in the frame marker. */
6328 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6330 /* Use the slot at -40 in the frame marker since HP builtin
6331 alloca doesn't copy it. */
6332 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6336 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6337 /* Use the return pointer slot in the frame marker. */
6338 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6340 /* Use the "Clean Up" slot in the frame marker. In GCC,
6341 the only other use of this location is for copying a
6342 floating point double argument from a floating-point
6343 register to two general registers. The copy is done
6344 as an "atomic" operation when outputting a call, so it
6345 won't interfere with our using the location here. */
6346 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6349 if (TARGET_PORTABLE_RUNTIME)
6351 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6352 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6353 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6357 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6358 if (TARGET_SOM || !TARGET_GAS)
6360 xoperands[1] = gen_label_rtx ();
6361 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6362 targetm.asm_out.internal_label (asm_out_file, "L",
6363 CODE_LABEL_NUMBER (xoperands[1]));
6364 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6368 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6369 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6371 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6374 /* Now output a very long branch to the original target. */
6375 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6377 /* Now restore the value of %r1 in the delay slot. */
6380 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6381 return "ldd -16(%%r30),%%r1";
6383 return "ldd -40(%%r30),%%r1";
6387 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6388 return "ldw -20(%%r30),%%r1";
6390 return "ldw -12(%%r30),%%r1";
6394 /* This routine handles all the branch-on-bit conditional branch sequences we
6395 might need to generate. It handles nullification of delay slots,
6396 varying length branches, negated branches and all combinations of the
6397 above. it returns the appropriate output template to emit the branch. */
6400 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6402 static char buf[100];
6404 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6405 int length = get_attr_length (insn);
6408 /* A conditional branch to the following instruction (e.g. the delay slot) is
6409 asking for a disaster. I do not think this can happen as this pattern
6410 is only used when optimizing; jump optimization should eliminate the
6411 jump. But be prepared just in case. */
6413 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6416 /* If this is a long branch with its delay slot unfilled, set `nullify'
6417 as it can nullify the delay slot and save a nop. */
6418 if (length == 8 && dbr_sequence_length () == 0)
6421 /* If this is a short forward conditional branch which did not get
6422 its delay slot filled, the delay slot can still be nullified. */
6423 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6424 nullify = forward_branch_p (insn);
6426 /* A forward branch over a single nullified insn can be done with a
6427 extrs instruction. This avoids a single cycle penalty due to
6428 mis-predicted branch if we fall through (branch not taken). */
6431 && next_real_insn (insn) != 0
6432 && get_attr_length (next_real_insn (insn)) == 4
6433 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6440 /* All short conditional branches except backwards with an unfilled
6444 strcpy (buf, "{extrs,|extrw,s,}");
6446 strcpy (buf, "bb,");
6447 if (useskip && GET_MODE (operands[0]) == DImode)
6448 strcpy (buf, "extrd,s,*");
6449 else if (GET_MODE (operands[0]) == DImode)
6450 strcpy (buf, "bb,*");
6451 if ((which == 0 && negated)
6452 || (which == 1 && ! negated))
6457 strcat (buf, " %0,%1,1,%%r0");
6458 else if (nullify && negated)
6459 strcat (buf, ",n %0,%1,%3");
6460 else if (nullify && ! negated)
6461 strcat (buf, ",n %0,%1,%2");
6462 else if (! nullify && negated)
6463 strcat (buf, "%0,%1,%3");
6464 else if (! nullify && ! negated)
6465 strcat (buf, " %0,%1,%2");
6468 /* All long conditionals. Note a short backward branch with an
6469 unfilled delay slot is treated just like a long backward branch
6470 with an unfilled delay slot. */
6472 /* Handle weird backwards branch with a filled delay slot
6473 which is nullified. */
6474 if (dbr_sequence_length () != 0
6475 && ! forward_branch_p (insn)
6478 strcpy (buf, "bb,");
6479 if (GET_MODE (operands[0]) == DImode)
6481 if ((which == 0 && negated)
6482 || (which == 1 && ! negated))
6487 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6489 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6491 /* Handle short backwards branch with an unfilled delay slot.
6492 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6493 taken and untaken branches. */
6494 else if (dbr_sequence_length () == 0
6495 && ! forward_branch_p (insn)
6496 && INSN_ADDRESSES_SET_P ()
6497 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6498 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6500 strcpy (buf, "bb,");
6501 if (GET_MODE (operands[0]) == DImode)
6503 if ((which == 0 && negated)
6504 || (which == 1 && ! negated))
6509 strcat (buf, " %0,%1,%3%#");
6511 strcat (buf, " %0,%1,%2%#");
6515 if (GET_MODE (operands[0]) == DImode)
6516 strcpy (buf, "extrd,s,*");
6518 strcpy (buf, "{extrs,|extrw,s,}");
6519 if ((which == 0 && negated)
6520 || (which == 1 && ! negated))
6524 if (nullify && negated)
6525 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6526 else if (nullify && ! negated)
6527 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6529 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6531 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6536 /* The reversed conditional branch must branch over one additional
6537 instruction if the delay slot is filled and needs to be extracted
6538 by output_lbranch. If the delay slot is empty or this is a
6539 nullified forward branch, the instruction after the reversed
6540 condition branch must be nullified. */
6541 if (dbr_sequence_length () == 0
6542 || (nullify && forward_branch_p (insn)))
6546 operands[4] = GEN_INT (length);
6551 operands[4] = GEN_INT (length + 4);
6554 if (GET_MODE (operands[0]) == DImode)
6555 strcpy (buf, "bb,*");
6557 strcpy (buf, "bb,");
6558 if ((which == 0 && negated)
6559 || (which == 1 && !negated))
6564 strcat (buf, ",n %0,%1,.+%4");
6566 strcat (buf, " %0,%1,.+%4");
6567 output_asm_insn (buf, operands);
6568 return output_lbranch (negated ? operands[3] : operands[2],
6574 /* This routine handles all the branch-on-variable-bit conditional branch
6575 sequences we might need to generate. It handles nullification of delay
6576 slots, varying length branches, negated branches and all combinations
6577 of the above. it returns the appropriate output template to emit the
6581 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6583 static char buf[100];
6585 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6586 int length = get_attr_length (insn);
6589 /* A conditional branch to the following instruction (e.g. the delay slot) is
6590 asking for a disaster. I do not think this can happen as this pattern
6591 is only used when optimizing; jump optimization should eliminate the
6592 jump. But be prepared just in case. */
6594 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6597 /* If this is a long branch with its delay slot unfilled, set `nullify'
6598 as it can nullify the delay slot and save a nop. */
6599 if (length == 8 && dbr_sequence_length () == 0)
6602 /* If this is a short forward conditional branch which did not get
6603 its delay slot filled, the delay slot can still be nullified. */
6604 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6605 nullify = forward_branch_p (insn);
6607 /* A forward branch over a single nullified insn can be done with a
6608 extrs instruction. This avoids a single cycle penalty due to
6609 mis-predicted branch if we fall through (branch not taken). */
6612 && next_real_insn (insn) != 0
6613 && get_attr_length (next_real_insn (insn)) == 4
6614 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6621 /* All short conditional branches except backwards with an unfilled
6625 strcpy (buf, "{vextrs,|extrw,s,}");
6627 strcpy (buf, "{bvb,|bb,}");
6628 if (useskip && GET_MODE (operands[0]) == DImode)
6629 strcpy (buf, "extrd,s,*");
6630 else if (GET_MODE (operands[0]) == DImode)
6631 strcpy (buf, "bb,*");
6632 if ((which == 0 && negated)
6633 || (which == 1 && ! negated))
6638 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6639 else if (nullify && negated)
6640 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6641 else if (nullify && ! negated)
6642 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6643 else if (! nullify && negated)
6644 strcat (buf, "{%0,%3|%0,%%sar,%3}");
6645 else if (! nullify && ! negated)
6646 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6649 /* All long conditionals. Note a short backward branch with an
6650 unfilled delay slot is treated just like a long backward branch
6651 with an unfilled delay slot. */
6653 /* Handle weird backwards branch with a filled delay slot
6654 which is nullified. */
6655 if (dbr_sequence_length () != 0
6656 && ! forward_branch_p (insn)
6659 strcpy (buf, "{bvb,|bb,}");
6660 if (GET_MODE (operands[0]) == DImode)
6662 if ((which == 0 && negated)
6663 || (which == 1 && ! negated))
6668 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6670 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6672 /* Handle short backwards branch with an unfilled delay slot.
6673 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6674 taken and untaken branches. */
6675 else if (dbr_sequence_length () == 0
6676 && ! forward_branch_p (insn)
6677 && INSN_ADDRESSES_SET_P ()
6678 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6679 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6681 strcpy (buf, "{bvb,|bb,}");
6682 if (GET_MODE (operands[0]) == DImode)
6684 if ((which == 0 && negated)
6685 || (which == 1 && ! negated))
6690 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6692 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6696 strcpy (buf, "{vextrs,|extrw,s,}");
6697 if (GET_MODE (operands[0]) == DImode)
6698 strcpy (buf, "extrd,s,*");
6699 if ((which == 0 && negated)
6700 || (which == 1 && ! negated))
6704 if (nullify && negated)
6705 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6706 else if (nullify && ! negated)
6707 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6709 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6711 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6716 /* The reversed conditional branch must branch over one additional
6717 instruction if the delay slot is filled and needs to be extracted
6718 by output_lbranch. If the delay slot is empty or this is a
6719 nullified forward branch, the instruction after the reversed
6720 condition branch must be nullified. */
6721 if (dbr_sequence_length () == 0
6722 || (nullify && forward_branch_p (insn)))
6726 operands[4] = GEN_INT (length);
6731 operands[4] = GEN_INT (length + 4);
6734 if (GET_MODE (operands[0]) == DImode)
6735 strcpy (buf, "bb,*");
6737 strcpy (buf, "{bvb,|bb,}");
6738 if ((which == 0 && negated)
6739 || (which == 1 && !negated))
6744 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6746 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6747 output_asm_insn (buf, operands);
6748 return output_lbranch (negated ? operands[3] : operands[2],
6754 /* Return the output template for emitting a dbra type insn.
6756 Note it may perform some output operations on its own before
6757 returning the final output string. */
6759 output_dbra (rtx *operands, rtx insn, int which_alternative)
6761 int length = get_attr_length (insn);
6763 /* A conditional branch to the following instruction (e.g. the delay slot) is
6764 asking for a disaster. Be prepared! */
6766 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6768 if (which_alternative == 0)
6769 return "ldo %1(%0),%0";
6770 else if (which_alternative == 1)
6772 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6773 output_asm_insn ("ldw -16(%%r30),%4", operands);
6774 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6775 return "{fldws|fldw} -16(%%r30),%0";
6779 output_asm_insn ("ldw %0,%4", operands);
6780 return "ldo %1(%4),%4\n\tstw %4,%0";
6784 if (which_alternative == 0)
6786 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6789 /* If this is a long branch with its delay slot unfilled, set `nullify'
6790 as it can nullify the delay slot and save a nop. */
6791 if (length == 8 && dbr_sequence_length () == 0)
6794 /* If this is a short forward conditional branch which did not get
6795 its delay slot filled, the delay slot can still be nullified. */
6796 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6797 nullify = forward_branch_p (insn);
6803 return "addib,%C2,n %1,%0,%3";
6805 return "addib,%C2 %1,%0,%3";
6808 /* Handle weird backwards branch with a fulled delay slot
6809 which is nullified. */
6810 if (dbr_sequence_length () != 0
6811 && ! forward_branch_p (insn)
6813 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6814 /* Handle short backwards branch with an unfilled delay slot.
6815 Using a addb;nop rather than addi;bl saves 1 cycle for both
6816 taken and untaken branches. */
6817 else if (dbr_sequence_length () == 0
6818 && ! forward_branch_p (insn)
6819 && INSN_ADDRESSES_SET_P ()
6820 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6821 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6822 return "addib,%C2 %1,%0,%3%#";
6824 /* Handle normal cases. */
6826 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6828 return "addi,%N2 %1,%0,%0\n\tb %3";
6831 /* The reversed conditional branch must branch over one additional
6832 instruction if the delay slot is filled and needs to be extracted
6833 by output_lbranch. If the delay slot is empty or this is a
6834 nullified forward branch, the instruction after the reversed
6835 condition branch must be nullified. */
6836 if (dbr_sequence_length () == 0
6837 || (nullify && forward_branch_p (insn)))
6841 operands[4] = GEN_INT (length);
6846 operands[4] = GEN_INT (length + 4);
6850 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
6852 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
6854 return output_lbranch (operands[3], insn, xdelay);
6858 /* Deal with gross reload from FP register case. */
6859 else if (which_alternative == 1)
6861 /* Move loop counter from FP register to MEM then into a GR,
6862 increment the GR, store the GR into MEM, and finally reload
6863 the FP register from MEM from within the branch's delay slot. */
6864 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6866 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6868 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6869 else if (length == 28)
6870 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6873 operands[5] = GEN_INT (length - 16);
6874 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
6875 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6876 return output_lbranch (operands[3], insn, 0);
6879 /* Deal with gross reload from memory case. */
6882 /* Reload loop counter from memory, the store back to memory
6883 happens in the branch's delay slot. */
6884 output_asm_insn ("ldw %0,%4", operands);
6886 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6887 else if (length == 16)
6888 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6891 operands[5] = GEN_INT (length - 4);
6892 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
6893 return output_lbranch (operands[3], insn, 0);
6898 /* Return the output template for emitting a movb type insn.
6900 Note it may perform some output operations on its own before
6901 returning the final output string. */
6903 output_movb (rtx *operands, rtx insn, int which_alternative,
6904 int reverse_comparison)
6906 int length = get_attr_length (insn);
6908 /* A conditional branch to the following instruction (e.g. the delay slot) is
6909 asking for a disaster. Be prepared! */
6911 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6913 if (which_alternative == 0)
6914 return "copy %1,%0";
6915 else if (which_alternative == 1)
6917 output_asm_insn ("stw %1,-16(%%r30)", operands);
6918 return "{fldws|fldw} -16(%%r30),%0";
6920 else if (which_alternative == 2)
6926 /* Support the second variant. */
6927 if (reverse_comparison)
6928 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6930 if (which_alternative == 0)
6932 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6935 /* If this is a long branch with its delay slot unfilled, set `nullify'
6936 as it can nullify the delay slot and save a nop. */
6937 if (length == 8 && dbr_sequence_length () == 0)
6940 /* If this is a short forward conditional branch which did not get
6941 its delay slot filled, the delay slot can still be nullified. */
6942 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6943 nullify = forward_branch_p (insn);
6949 return "movb,%C2,n %1,%0,%3";
6951 return "movb,%C2 %1,%0,%3";
6954 /* Handle weird backwards branch with a filled delay slot
6955 which is nullified. */
6956 if (dbr_sequence_length () != 0
6957 && ! forward_branch_p (insn)
6959 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6961 /* Handle short backwards branch with an unfilled delay slot.
6962 Using a movb;nop rather than or;bl saves 1 cycle for both
6963 taken and untaken branches. */
6964 else if (dbr_sequence_length () == 0
6965 && ! forward_branch_p (insn)
6966 && INSN_ADDRESSES_SET_P ()
6967 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6968 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6969 return "movb,%C2 %1,%0,%3%#";
6970 /* Handle normal cases. */
6972 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6974 return "or,%N2 %1,%%r0,%0\n\tb %3";
6977 /* The reversed conditional branch must branch over one additional
6978 instruction if the delay slot is filled and needs to be extracted
6979 by output_lbranch. If the delay slot is empty or this is a
6980 nullified forward branch, the instruction after the reversed
6981 condition branch must be nullified. */
6982 if (dbr_sequence_length () == 0
6983 || (nullify && forward_branch_p (insn)))
6987 operands[4] = GEN_INT (length);
6992 operands[4] = GEN_INT (length + 4);
6996 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
6998 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7000 return output_lbranch (operands[3], insn, xdelay);
7003 /* Deal with gross reload for FP destination register case. */
7004 else if (which_alternative == 1)
7006 /* Move source register to MEM, perform the branch test, then
7007 finally load the FP register from MEM from within the branch's
7009 output_asm_insn ("stw %1,-16(%%r30)", operands);
7011 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7012 else if (length == 16)
7013 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7016 operands[4] = GEN_INT (length - 4);
7017 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7018 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7019 return output_lbranch (operands[3], insn, 0);
7022 /* Deal with gross reload from memory case. */
7023 else if (which_alternative == 2)
7025 /* Reload loop counter from memory, the store back to memory
7026 happens in the branch's delay slot. */
7028 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7029 else if (length == 12)
7030 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7033 operands[4] = GEN_INT (length);
7034 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7036 return output_lbranch (operands[3], insn, 0);
7039 /* Handle SAR as a destination. */
7043 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7044 else if (length == 12)
7045 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7048 operands[4] = GEN_INT (length);
7049 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7051 return output_lbranch (operands[3], insn, 0);
7056 /* Copy any FP arguments in INSN into integer registers. */
7058 copy_fp_args (rtx insn)
7063 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7065 int arg_mode, regno;
7066 rtx use = XEXP (link, 0);
7068 if (! (GET_CODE (use) == USE
7069 && GET_CODE (XEXP (use, 0)) == REG
7070 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7073 arg_mode = GET_MODE (XEXP (use, 0));
7074 regno = REGNO (XEXP (use, 0));
7076 /* Is it a floating point register? */
7077 if (regno >= 32 && regno <= 39)
7079 /* Copy the FP register into an integer register via memory. */
7080 if (arg_mode == SFmode)
7082 xoperands[0] = XEXP (use, 0);
7083 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7084 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7085 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7089 xoperands[0] = XEXP (use, 0);
7090 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7091 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7092 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7093 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7099 /* Compute length of the FP argument copy sequence for INSN. */
7101 length_fp_args (rtx insn)
7106 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7108 int arg_mode, regno;
7109 rtx use = XEXP (link, 0);
7111 if (! (GET_CODE (use) == USE
7112 && GET_CODE (XEXP (use, 0)) == REG
7113 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7116 arg_mode = GET_MODE (XEXP (use, 0));
7117 regno = REGNO (XEXP (use, 0));
7119 /* Is it a floating point register? */
7120 if (regno >= 32 && regno <= 39)
7122 if (arg_mode == SFmode)
7132 /* Return the attribute length for the millicode call instruction INSN.
7133 The length must match the code generated by output_millicode_call.
7134 We include the delay slot in the returned length as it is better to
7135 over estimate the length than to under estimate it. */
7138 attr_length_millicode_call (rtx insn)
7140 unsigned long distance = -1;
7141 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7143 if (INSN_ADDRESSES_SET_P ())
7145 distance = (total + insn_current_reference_address (insn));
7146 if (distance < total)
7152 if (!TARGET_LONG_CALLS && distance < 7600000)
7157 else if (TARGET_PORTABLE_RUNTIME)
7161 if (!TARGET_LONG_CALLS && distance < 240000)
7164 if (TARGET_LONG_ABS_CALL && !flag_pic)
7171 /* INSN is a function call. It may have an unconditional jump
7174 CALL_DEST is the routine we are calling. */
7177 output_millicode_call (rtx insn, rtx call_dest)
7179 int attr_length = get_attr_length (insn);
7180 int seq_length = dbr_sequence_length ();
7185 xoperands[0] = call_dest;
7186 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7188 /* Handle the common case where we are sure that the branch will
7189 reach the beginning of the $CODE$ subspace. The within reach
7190 form of the $$sh_func_adrs call has a length of 28. Because
7191 it has an attribute type of multi, it never has a nonzero
7192 sequence length. The length of the $$sh_func_adrs is the same
7193 as certain out of reach PIC calls to other routines. */
7194 if (!TARGET_LONG_CALLS
7195 && ((seq_length == 0
7196 && (attr_length == 12
7197 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7198 || (seq_length != 0 && attr_length == 8)))
7200 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7206 /* It might seem that one insn could be saved by accessing
7207 the millicode function using the linkage table. However,
7208 this doesn't work in shared libraries and other dynamically
7209 loaded objects. Using a pc-relative sequence also avoids
7210 problems related to the implicit use of the gp register. */
7211 output_asm_insn ("b,l .+8,%%r1", xoperands);
7215 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7216 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7220 xoperands[1] = gen_label_rtx ();
7221 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7222 targetm.asm_out.internal_label (asm_out_file, "L",
7223 CODE_LABEL_NUMBER (xoperands[1]));
7224 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7227 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7229 else if (TARGET_PORTABLE_RUNTIME)
7231 /* Pure portable runtime doesn't allow be/ble; we also don't
7232 have PIC support in the assembler/linker, so this sequence
7235 /* Get the address of our target into %r1. */
7236 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7237 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7239 /* Get our return address into %r31. */
7240 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7241 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7243 /* Jump to our target address in %r1. */
7244 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7248 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7250 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7252 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7256 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7257 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7259 if (TARGET_SOM || !TARGET_GAS)
7261 /* The HP assembler can generate relocations for the
7262 difference of two symbols. GAS can do this for a
7263 millicode symbol but not an arbitrary external
7264 symbol when generating SOM output. */
7265 xoperands[1] = gen_label_rtx ();
7266 targetm.asm_out.internal_label (asm_out_file, "L",
7267 CODE_LABEL_NUMBER (xoperands[1]));
7268 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7269 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7273 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7274 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7278 /* Jump to our target address in %r1. */
7279 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7283 if (seq_length == 0)
7284 output_asm_insn ("nop", xoperands);
7286 /* We are done if there isn't a jump in the delay slot. */
7287 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7290 /* This call has an unconditional jump in its delay slot. */
7291 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7293 /* See if the return address can be adjusted. Use the containing
7294 sequence insn's address. */
7295 if (INSN_ADDRESSES_SET_P ())
7297 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7298 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7299 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7301 if (VAL_14_BITS_P (distance))
7303 xoperands[1] = gen_label_rtx ();
7304 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7305 targetm.asm_out.internal_label (asm_out_file, "L",
7306 CODE_LABEL_NUMBER (xoperands[1]));
7309 /* ??? This branch may not reach its target. */
7310 output_asm_insn ("nop\n\tb,n %0", xoperands);
7313 /* ??? This branch may not reach its target. */
7314 output_asm_insn ("nop\n\tb,n %0", xoperands);
7316 /* Delete the jump. */
7317 SET_INSN_DELETED (NEXT_INSN (insn));
7322 /* Return the attribute length of the call instruction INSN. The SIBCALL
7323 flag indicates whether INSN is a regular call or a sibling call. The
7324 length returned must be longer than the code actually generated by
7325 output_call. Since branch shortening is done before delay branch
7326 sequencing, there is no way to determine whether or not the delay
7327 slot will be filled during branch shortening. Even when the delay
7328 slot is filled, we may have to add a nop if the delay slot contains
7329 a branch that can't reach its target. Thus, we always have to include
7330 the delay slot in the length estimate. This used to be done in
7331 pa_adjust_insn_length but we do it here now as some sequences always
7332 fill the delay slot and we can save four bytes in the estimate for
7336 attr_length_call (rtx insn, int sibcall)
7342 rtx pat = PATTERN (insn);
7343 unsigned long distance = -1;
7345 if (INSN_ADDRESSES_SET_P ())
7347 unsigned long total;
7349 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7350 distance = (total + insn_current_reference_address (insn));
7351 if (distance < total)
7355 /* Determine if this is a local call. */
7356 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL)
7357 call_dest = XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0);
7359 call_dest = XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0);
7361 call_decl = SYMBOL_REF_DECL (call_dest);
7362 local_call = call_decl && targetm.binds_local_p (call_decl);
7364 /* pc-relative branch. */
7365 if (!TARGET_LONG_CALLS
7366 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7367 || distance < 240000))
7370 /* 64-bit plabel sequence. */
7371 else if (TARGET_64BIT && !local_call)
7372 length += sibcall ? 28 : 24;
7374 /* non-pic long absolute branch sequence. */
7375 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7378 /* long pc-relative branch sequence. */
7379 else if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7380 || (TARGET_64BIT && !TARGET_GAS)
7381 || (TARGET_GAS && !TARGET_SOM
7382 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7386 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS)
7390 /* 32-bit plabel sequence. */
7396 length += length_fp_args (insn);
7406 if (!TARGET_NO_SPACE_REGS)
7414 /* INSN is a function call. It may have an unconditional jump
7417 CALL_DEST is the routine we are calling. */
7420 output_call (rtx insn, rtx call_dest, int sibcall)
7422 int delay_insn_deleted = 0;
7423 int delay_slot_filled = 0;
7424 int seq_length = dbr_sequence_length ();
7425 tree call_decl = SYMBOL_REF_DECL (call_dest);
7426 int local_call = call_decl && targetm.binds_local_p (call_decl);
7429 xoperands[0] = call_dest;
7431 /* Handle the common case where we're sure that the branch will reach
7432 the beginning of the "$CODE$" subspace. This is the beginning of
7433 the current function if we are in a named section. */
7434 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7436 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7437 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7441 if (TARGET_64BIT && !local_call)
7443 /* ??? As far as I can tell, the HP linker doesn't support the
7444 long pc-relative sequence described in the 64-bit runtime
7445 architecture. So, we use a slightly longer indirect call. */
7446 xoperands[0] = get_deferred_plabel (call_dest);
7447 xoperands[1] = gen_label_rtx ();
7449 /* If this isn't a sibcall, we put the load of %r27 into the
7450 delay slot. We can't do this in a sibcall as we don't
7451 have a second call-clobbered scratch register available. */
7453 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7456 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7459 /* Now delete the delay insn. */
7460 SET_INSN_DELETED (NEXT_INSN (insn));
7461 delay_insn_deleted = 1;
7464 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7465 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7466 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7470 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7471 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7472 output_asm_insn ("bve (%%r1)", xoperands);
7476 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7477 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7478 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7479 delay_slot_filled = 1;
7484 int indirect_call = 0;
7486 /* Emit a long call. There are several different sequences
7487 of increasing length and complexity. In most cases,
7488 they don't allow an instruction in the delay slot. */
7489 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7490 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7491 && !(TARGET_GAS && !TARGET_SOM
7492 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7497 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7499 && (!TARGET_PA_20 || indirect_call))
7501 /* A non-jump insn in the delay slot. By definition we can
7502 emit this insn before the call (and in fact before argument
7504 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7507 /* Now delete the delay insn. */
7508 SET_INSN_DELETED (NEXT_INSN (insn));
7509 delay_insn_deleted = 1;
7512 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7514 /* This is the best sequence for making long calls in
7515 non-pic code. Unfortunately, GNU ld doesn't provide
7516 the stub needed for external calls, and GAS's support
7517 for this with the SOM linker is buggy. It is safe
7518 to use this for local calls. */
7519 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7521 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7525 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7528 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7530 output_asm_insn ("copy %%r31,%%r2", xoperands);
7531 delay_slot_filled = 1;
7536 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7537 || (TARGET_64BIT && !TARGET_GAS))
7539 /* The HP assembler and linker can handle relocations
7540 for the difference of two symbols. GAS and the HP
7541 linker can't do this when one of the symbols is
7543 xoperands[1] = gen_label_rtx ();
7544 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7545 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7546 targetm.asm_out.internal_label (asm_out_file, "L",
7547 CODE_LABEL_NUMBER (xoperands[1]));
7548 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7550 else if (TARGET_GAS && !TARGET_SOM
7551 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7553 /* GAS currently can't generate the relocations that
7554 are needed for the SOM linker under HP-UX using this
7555 sequence. The GNU linker doesn't generate the stubs
7556 that are needed for external calls on TARGET_ELF32
7557 with this sequence. For now, we have to use a
7558 longer plabel sequence when using GAS. */
7559 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7560 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7562 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7567 /* Emit a long plabel-based call sequence. This is
7568 essentially an inline implementation of $$dyncall.
7569 We don't actually try to call $$dyncall as this is
7570 as difficult as calling the function itself. */
7571 xoperands[0] = get_deferred_plabel (call_dest);
7572 xoperands[1] = gen_label_rtx ();
7574 /* Since the call is indirect, FP arguments in registers
7575 need to be copied to the general registers. Then, the
7576 argument relocation stub will copy them back. */
7578 copy_fp_args (insn);
7582 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7583 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7584 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7588 output_asm_insn ("addil LR'%0-$global$,%%r27",
7590 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7594 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7595 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7596 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7597 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7599 if (!sibcall && !TARGET_PA_20)
7601 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7602 if (TARGET_NO_SPACE_REGS)
7603 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7605 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7612 output_asm_insn ("bve (%%r1)", xoperands);
7617 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7618 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7619 delay_slot_filled = 1;
7622 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7627 if (!TARGET_NO_SPACE_REGS)
7628 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7633 if (TARGET_NO_SPACE_REGS)
7634 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7636 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7640 if (TARGET_NO_SPACE_REGS)
7641 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7643 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7646 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7648 output_asm_insn ("copy %%r31,%%r2", xoperands);
7649 delay_slot_filled = 1;
7656 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7657 output_asm_insn ("nop", xoperands);
7659 /* We are done if there isn't a jump in the delay slot. */
7661 || delay_insn_deleted
7662 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7665 /* A sibcall should never have a branch in the delay slot. */
7666 gcc_assert (!sibcall);
7668 /* This call has an unconditional jump in its delay slot. */
7669 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7671 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7673 /* See if the return address can be adjusted. Use the containing
7674 sequence insn's address. */
7675 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7676 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7677 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7679 if (VAL_14_BITS_P (distance))
7681 xoperands[1] = gen_label_rtx ();
7682 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7683 targetm.asm_out.internal_label (asm_out_file, "L",
7684 CODE_LABEL_NUMBER (xoperands[1]));
7687 output_asm_insn ("nop\n\tb,n %0", xoperands);
7690 output_asm_insn ("b,n %0", xoperands);
7692 /* Delete the jump. */
7693 SET_INSN_DELETED (NEXT_INSN (insn));
7698 /* Return the attribute length of the indirect call instruction INSN.
7699 The length must match the code generated by output_indirect call.
7700 The returned length includes the delay slot. Currently, the delay
7701 slot of an indirect call sequence is not exposed and it is used by
7702 the sequence itself. */
7705 attr_length_indirect_call (rtx insn)
7707 unsigned long distance = -1;
7708 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7710 if (INSN_ADDRESSES_SET_P ())
7712 distance = (total + insn_current_reference_address (insn));
7713 if (distance < total)
7720 if (TARGET_FAST_INDIRECT_CALLS
7721 || (!TARGET_PORTABLE_RUNTIME
7722 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7723 || distance < 240000)))
7729 if (TARGET_PORTABLE_RUNTIME)
7732 /* Out of reach, can use ble. */
7737 output_indirect_call (rtx insn, rtx call_dest)
7743 xoperands[0] = call_dest;
7744 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7745 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7749 /* First the special case for kernels, level 0 systems, etc. */
7750 if (TARGET_FAST_INDIRECT_CALLS)
7751 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7753 /* Now the normal case -- we can reach $$dyncall directly or
7754 we're sure that we can get there via a long-branch stub.
7756 No need to check target flags as the length uniquely identifies
7757 the remaining cases. */
7758 if (attr_length_indirect_call (insn) == 8)
7760 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7761 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
7762 variant of the B,L instruction can't be used on the SOM target. */
7763 if (TARGET_PA_20 && !TARGET_SOM)
7764 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7766 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7769 /* Long millicode call, but we are not generating PIC or portable runtime
7771 if (attr_length_indirect_call (insn) == 12)
7772 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7774 /* Long millicode call for portable runtime. */
7775 if (attr_length_indirect_call (insn) == 20)
7776 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7778 /* We need a long PIC call to $$dyncall. */
7779 xoperands[0] = NULL_RTX;
7780 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7781 if (TARGET_SOM || !TARGET_GAS)
7783 xoperands[0] = gen_label_rtx ();
7784 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7785 targetm.asm_out.internal_label (asm_out_file, "L",
7786 CODE_LABEL_NUMBER (xoperands[0]));
7787 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7791 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7792 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7795 output_asm_insn ("blr %%r0,%%r2", xoperands);
7796 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7800 /* Return the total length of the save and restore instructions needed for
7801 the data linkage table pointer (i.e., the PIC register) across the call
7802 instruction INSN. No-return calls do not require a save and restore.
7803 In addition, we may be able to avoid the save and restore for calls
7804 within the same translation unit. */
7807 attr_length_save_restore_dltp (rtx insn)
7809 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7815 /* In HPUX 8.0's shared library scheme, special relocations are needed
7816 for function labels if they might be passed to a function
7817 in a shared library (because shared libraries don't live in code
7818 space), and special magic is needed to construct their address. */
7821 hppa_encode_label (rtx sym)
7823 const char *str = XSTR (sym, 0);
7824 int len = strlen (str) + 1;
7827 p = newstr = alloca (len + 1);
7831 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7835 pa_encode_section_info (tree decl, rtx rtl, int first)
7837 default_encode_section_info (decl, rtl, first);
7839 if (first && TEXT_SPACE_P (decl))
7841 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7842 if (TREE_CODE (decl) == FUNCTION_DECL)
7843 hppa_encode_label (XEXP (rtl, 0));
7847 /* This is sort of inverse to pa_encode_section_info. */
7850 pa_strip_name_encoding (const char *str)
7852 str += (*str == '@');
7853 str += (*str == '*');
7858 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7860 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7863 /* Returns 1 if OP is a function label involved in a simple addition
7864 with a constant. Used to keep certain patterns from matching
7865 during instruction combination. */
7867 is_function_label_plus_const (rtx op)
7869 /* Strip off any CONST. */
7870 if (GET_CODE (op) == CONST)
7873 return (GET_CODE (op) == PLUS
7874 && function_label_operand (XEXP (op, 0), Pmode)
7875 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7878 /* Output assembly code for a thunk to FUNCTION. */
7881 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7882 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7885 static unsigned int current_thunk_number;
7886 int val_14 = VAL_14_BITS_P (delta);
7891 xoperands[0] = XEXP (DECL_RTL (function), 0);
7892 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
7893 xoperands[2] = GEN_INT (delta);
7895 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
7896 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7898 /* Output the thunk. We know that the function is in the same
7899 translation unit (i.e., the same space) as the thunk, and that
7900 thunks are output after their method. Thus, we don't need an
7901 external branch to reach the function. With SOM and GAS,
7902 functions and thunks are effectively in different sections.
7903 Thus, we can always use a IA-relative branch and the linker
7904 will add a long branch stub if necessary.
7906 However, we have to be careful when generating PIC code on the
7907 SOM port to ensure that the sequence does not transfer to an
7908 import stub for the target function as this could clobber the
7909 return value saved at SP-24. This would also apply to the
7910 32-bit linux port if the multi-space model is implemented. */
7911 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7912 && !(flag_pic && TREE_PUBLIC (function))
7913 && (TARGET_GAS || last_address < 262132))
7914 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7915 && ((targetm.have_named_sections
7916 && DECL_SECTION_NAME (thunk_fndecl) != NULL
7917 /* The GNU 64-bit linker has rather poor stub management.
7918 So, we use a long branch from thunks that aren't in
7919 the same section as the target function. */
7921 && (DECL_SECTION_NAME (thunk_fndecl)
7922 != DECL_SECTION_NAME (function)))
7923 || ((DECL_SECTION_NAME (thunk_fndecl)
7924 == DECL_SECTION_NAME (function))
7925 && last_address < 262132)))
7926 || (!targetm.have_named_sections && last_address < 262132))))
7929 output_asm_insn ("addil L'%2,%%r26", xoperands);
7931 output_asm_insn ("b %0", xoperands);
7935 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7940 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7944 else if (TARGET_64BIT)
7946 /* We only have one call-clobbered scratch register, so we can't
7947 make use of the delay slot if delta doesn't fit in 14 bits. */
7950 output_asm_insn ("addil L'%2,%%r26", xoperands);
7951 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7954 output_asm_insn ("b,l .+8,%%r1", xoperands);
7958 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7959 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7963 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
7964 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
7969 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7970 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7975 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
7979 else if (TARGET_PORTABLE_RUNTIME)
7981 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7982 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
7985 output_asm_insn ("addil L'%2,%%r26", xoperands);
7987 output_asm_insn ("bv %%r0(%%r22)", xoperands);
7991 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7996 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8000 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8002 /* The function is accessible from outside this module. The only
8003 way to avoid an import stub between the thunk and function is to
8004 call the function directly with an indirect sequence similar to
8005 that used by $$dyncall. This is possible because $$dyncall acts
8006 as the import stub in an indirect call. */
8007 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8008 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8009 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8010 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8011 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8012 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8013 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8014 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8015 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8019 output_asm_insn ("addil L'%2,%%r26", xoperands);
8025 output_asm_insn ("bve (%%r22)", xoperands);
8028 else if (TARGET_NO_SPACE_REGS)
8030 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8035 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8036 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8037 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8042 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8044 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8048 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8050 if (TARGET_SOM || !TARGET_GAS)
8052 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8053 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8057 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8058 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8062 output_asm_insn ("addil L'%2,%%r26", xoperands);
8064 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8068 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8073 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8080 output_asm_insn ("addil L'%2,%%r26", xoperands);
8082 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8083 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8087 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8092 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8097 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8099 if (TARGET_SOM && TARGET_GAS)
8101 /* We done with this subspace except possibly for some additional
8102 debug information. Forget that we are in this subspace to ensure
8103 that the next function is output in its own subspace. */
8105 cfun->machine->in_nsubspa = 2;
8108 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8110 switch_to_section (data_section);
8111 output_asm_insn (".align 4", xoperands);
8112 ASM_OUTPUT_LABEL (file, label);
8113 output_asm_insn (".word P'%0", xoperands);
8116 current_thunk_number++;
8117 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8118 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8119 last_address += nbytes;
8120 update_total_code_bytes (nbytes);
8123 /* Only direct calls to static functions are allowed to be sibling (tail)
8126 This restriction is necessary because some linker generated stubs will
8127 store return pointers into rp' in some cases which might clobber a
8128 live value already in rp'.
8130 In a sibcall the current function and the target function share stack
8131 space. Thus if the path to the current function and the path to the
8132 target function save a value in rp', they save the value into the
8133 same stack slot, which has undesirable consequences.
8135 Because of the deferred binding nature of shared libraries any function
8136 with external scope could be in a different load module and thus require
8137 rp' to be saved when calling that function. So sibcall optimizations
8138 can only be safe for static function.
8140 Note that GCC never needs return value relocations, so we don't have to
8141 worry about static calls with return value relocations (which require
8144 It is safe to perform a sibcall optimization when the target function
8145 will never return. */
8147 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8149 if (TARGET_PORTABLE_RUNTIME)
8152 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8153 single subspace mode and the call is not indirect. As far as I know,
8154 there is no operating system support for the multiple subspace mode.
8155 It might be possible to support indirect calls if we didn't use
8156 $$dyncall (see the indirect sequence generated in output_call). */
8158 return (decl != NULL_TREE);
8160 /* Sibcalls are not ok because the arg pointer register is not a fixed
8161 register. This prevents the sibcall optimization from occurring. In
8162 addition, there are problems with stub placement using GNU ld. This
8163 is because a normal sibcall branch uses a 17-bit relocation while
8164 a regular call branch uses a 22-bit relocation. As a result, more
8165 care needs to be taken in the placement of long-branch stubs. */
8169 /* Sibcalls are only ok within a translation unit. */
8170 return (decl && !TREE_PUBLIC (decl));
8173 /* ??? Addition is not commutative on the PA due to the weird implicit
8174 space register selection rules for memory addresses. Therefore, we
8175 don't consider a + b == b + a, as this might be inside a MEM. */
8177 pa_commutative_p (const_rtx x, int outer_code)
8179 return (COMMUTATIVE_P (x)
8180 && (TARGET_NO_SPACE_REGS
8181 || (outer_code != UNKNOWN && outer_code != MEM)
8182 || GET_CODE (x) != PLUS));
8185 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8186 use in fmpyadd instructions. */
8188 fmpyaddoperands (rtx *operands)
8190 enum machine_mode mode = GET_MODE (operands[0]);
8192 /* Must be a floating point mode. */
8193 if (mode != SFmode && mode != DFmode)
8196 /* All modes must be the same. */
8197 if (! (mode == GET_MODE (operands[1])
8198 && mode == GET_MODE (operands[2])
8199 && mode == GET_MODE (operands[3])
8200 && mode == GET_MODE (operands[4])
8201 && mode == GET_MODE (operands[5])))
8204 /* All operands must be registers. */
8205 if (! (GET_CODE (operands[1]) == REG
8206 && GET_CODE (operands[2]) == REG
8207 && GET_CODE (operands[3]) == REG
8208 && GET_CODE (operands[4]) == REG
8209 && GET_CODE (operands[5]) == REG))
8212 /* Only 2 real operands to the addition. One of the input operands must
8213 be the same as the output operand. */
8214 if (! rtx_equal_p (operands[3], operands[4])
8215 && ! rtx_equal_p (operands[3], operands[5]))
8218 /* Inout operand of add cannot conflict with any operands from multiply. */
8219 if (rtx_equal_p (operands[3], operands[0])
8220 || rtx_equal_p (operands[3], operands[1])
8221 || rtx_equal_p (operands[3], operands[2]))
8224 /* multiply cannot feed into addition operands. */
8225 if (rtx_equal_p (operands[4], operands[0])
8226 || rtx_equal_p (operands[5], operands[0]))
8229 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8231 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8232 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8233 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8234 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8235 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8236 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8239 /* Passed. Operands are suitable for fmpyadd. */
8243 #if !defined(USE_COLLECT2)
8245 pa_asm_out_constructor (rtx symbol, int priority)
8247 if (!function_label_operand (symbol, VOIDmode))
8248 hppa_encode_label (symbol);
8250 #ifdef CTORS_SECTION_ASM_OP
8251 default_ctor_section_asm_out_constructor (symbol, priority);
8253 # ifdef TARGET_ASM_NAMED_SECTION
8254 default_named_section_asm_out_constructor (symbol, priority);
8256 default_stabs_asm_out_constructor (symbol, priority);
8262 pa_asm_out_destructor (rtx symbol, int priority)
8264 if (!function_label_operand (symbol, VOIDmode))
8265 hppa_encode_label (symbol);
8267 #ifdef DTORS_SECTION_ASM_OP
8268 default_dtor_section_asm_out_destructor (symbol, priority);
8270 # ifdef TARGET_ASM_NAMED_SECTION
8271 default_named_section_asm_out_destructor (symbol, priority);
8273 default_stabs_asm_out_destructor (symbol, priority);
8279 /* This function places uninitialized global data in the bss section.
8280 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8281 function on the SOM port to prevent uninitialized global data from
8282 being placed in the data section. */
8285 pa_asm_output_aligned_bss (FILE *stream,
8287 unsigned HOST_WIDE_INT size,
8290 switch_to_section (bss_section);
8291 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8293 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8294 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8297 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8298 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8301 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8302 ASM_OUTPUT_LABEL (stream, name);
8303 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8306 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8307 that doesn't allow the alignment of global common storage to be directly
8308 specified. The SOM linker aligns common storage based on the rounded
8309 value of the NUM_BYTES parameter in the .comm directive. It's not
8310 possible to use the .align directive as it doesn't affect the alignment
8311 of the label associated with a .comm directive. */
8314 pa_asm_output_aligned_common (FILE *stream,
8316 unsigned HOST_WIDE_INT size,
8319 unsigned int max_common_align;
8321 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8322 if (align > max_common_align)
8324 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8325 "for global common data. Using %u",
8326 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8327 align = max_common_align;
8330 switch_to_section (bss_section);
8332 assemble_name (stream, name);
8333 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8334 MAX (size, align / BITS_PER_UNIT));
8337 /* We can't use .comm for local common storage as the SOM linker effectively
8338 treats the symbol as universal and uses the same storage for local symbols
8339 with the same name in different object files. The .block directive
8340 reserves an uninitialized block of storage. However, it's not common
8341 storage. Fortunately, GCC never requests common storage with the same
8342 name in any given translation unit. */
8345 pa_asm_output_aligned_local (FILE *stream,
8347 unsigned HOST_WIDE_INT size,
8350 switch_to_section (bss_section);
8351 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8354 fprintf (stream, "%s", LOCAL_ASM_OP);
8355 assemble_name (stream, name);
8356 fprintf (stream, "\n");
8359 ASM_OUTPUT_LABEL (stream, name);
8360 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8363 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8364 use in fmpysub instructions. */
8366 fmpysuboperands (rtx *operands)
8368 enum machine_mode mode = GET_MODE (operands[0]);
8370 /* Must be a floating point mode. */
8371 if (mode != SFmode && mode != DFmode)
8374 /* All modes must be the same. */
8375 if (! (mode == GET_MODE (operands[1])
8376 && mode == GET_MODE (operands[2])
8377 && mode == GET_MODE (operands[3])
8378 && mode == GET_MODE (operands[4])
8379 && mode == GET_MODE (operands[5])))
8382 /* All operands must be registers. */
8383 if (! (GET_CODE (operands[1]) == REG
8384 && GET_CODE (operands[2]) == REG
8385 && GET_CODE (operands[3]) == REG
8386 && GET_CODE (operands[4]) == REG
8387 && GET_CODE (operands[5]) == REG))
8390 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8391 operation, so operands[4] must be the same as operand[3]. */
8392 if (! rtx_equal_p (operands[3], operands[4]))
8395 /* multiply cannot feed into subtraction. */
8396 if (rtx_equal_p (operands[5], operands[0]))
8399 /* Inout operand of sub cannot conflict with any operands from multiply. */
8400 if (rtx_equal_p (operands[3], operands[0])
8401 || rtx_equal_p (operands[3], operands[1])
8402 || rtx_equal_p (operands[3], operands[2]))
8405 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8407 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8408 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8409 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8410 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8411 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8412 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8415 /* Passed. Operands are suitable for fmpysub. */
8419 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8420 constants for shadd instructions. */
8422 shadd_constant_p (int val)
8424 if (val == 2 || val == 4 || val == 8)
8430 /* Return 1 if OP is valid as a base or index register in a
8434 borx_reg_operand (rtx op, enum machine_mode mode)
8436 if (GET_CODE (op) != REG)
8439 /* We must reject virtual registers as the only expressions that
8440 can be instantiated are REG and REG+CONST. */
8441 if (op == virtual_incoming_args_rtx
8442 || op == virtual_stack_vars_rtx
8443 || op == virtual_stack_dynamic_rtx
8444 || op == virtual_outgoing_args_rtx
8445 || op == virtual_cfa_rtx)
8448 /* While it's always safe to index off the frame pointer, it's not
8449 profitable to do so when the frame pointer is being eliminated. */
8450 if (!reload_completed
8451 && flag_omit_frame_pointer
8452 && !current_function_calls_alloca
8453 && op == frame_pointer_rtx)
8456 return register_operand (op, mode);
8459 /* Return 1 if this operand is anything other than a hard register. */
8462 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8464 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8467 /* Return 1 if INSN branches forward. Should be using insn_addresses
8468 to avoid walking through all the insns... */
8470 forward_branch_p (rtx insn)
8472 rtx label = JUMP_LABEL (insn);
8479 insn = NEXT_INSN (insn);
8482 return (insn == label);
8485 /* Return 1 if OP is an equality comparison, else return 0. */
8487 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8489 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8492 /* Return 1 if INSN is in the delay slot of a call instruction. */
8494 jump_in_call_delay (rtx insn)
8497 if (GET_CODE (insn) != JUMP_INSN)
8500 if (PREV_INSN (insn)
8501 && PREV_INSN (PREV_INSN (insn))
8502 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8504 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8506 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8507 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8514 /* Output an unconditional move and branch insn. */
8517 output_parallel_movb (rtx *operands, rtx insn)
8519 int length = get_attr_length (insn);
8521 /* These are the cases in which we win. */
8523 return "mov%I1b,tr %1,%0,%2";
8525 /* None of the following cases win, but they don't lose either. */
8528 if (dbr_sequence_length () == 0)
8530 /* Nothing in the delay slot, fake it by putting the combined
8531 insn (the copy or add) in the delay slot of a bl. */
8532 if (GET_CODE (operands[1]) == CONST_INT)
8533 return "b %2\n\tldi %1,%0";
8535 return "b %2\n\tcopy %1,%0";
8539 /* Something in the delay slot, but we've got a long branch. */
8540 if (GET_CODE (operands[1]) == CONST_INT)
8541 return "ldi %1,%0\n\tb %2";
8543 return "copy %1,%0\n\tb %2";
8547 if (GET_CODE (operands[1]) == CONST_INT)
8548 output_asm_insn ("ldi %1,%0", operands);
8550 output_asm_insn ("copy %1,%0", operands);
8551 return output_lbranch (operands[2], insn, 1);
8554 /* Output an unconditional add and branch insn. */
8557 output_parallel_addb (rtx *operands, rtx insn)
8559 int length = get_attr_length (insn);
8561 /* To make life easy we want operand0 to be the shared input/output
8562 operand and operand1 to be the readonly operand. */
8563 if (operands[0] == operands[1])
8564 operands[1] = operands[2];
8566 /* These are the cases in which we win. */
8568 return "add%I1b,tr %1,%0,%3";
8570 /* None of the following cases win, but they don't lose either. */
8573 if (dbr_sequence_length () == 0)
8574 /* Nothing in the delay slot, fake it by putting the combined
8575 insn (the copy or add) in the delay slot of a bl. */
8576 return "b %3\n\tadd%I1 %1,%0,%0";
8578 /* Something in the delay slot, but we've got a long branch. */
8579 return "add%I1 %1,%0,%0\n\tb %3";
8582 output_asm_insn ("add%I1 %1,%0,%0", operands);
8583 return output_lbranch (operands[3], insn, 1);
8586 /* Return nonzero if INSN (a jump insn) immediately follows a call
8587 to a named function. This is used to avoid filling the delay slot
8588 of the jump since it can usually be eliminated by modifying RP in
8589 the delay slot of the call. */
8592 following_call (rtx insn)
8594 if (! TARGET_JUMP_IN_DELAY)
8597 /* Find the previous real insn, skipping NOTEs. */
8598 insn = PREV_INSN (insn);
8599 while (insn && GET_CODE (insn) == NOTE)
8600 insn = PREV_INSN (insn);
8602 /* Check for CALL_INSNs and millicode calls. */
8604 && ((GET_CODE (insn) == CALL_INSN
8605 && get_attr_type (insn) != TYPE_DYNCALL)
8606 || (GET_CODE (insn) == INSN
8607 && GET_CODE (PATTERN (insn)) != SEQUENCE
8608 && GET_CODE (PATTERN (insn)) != USE
8609 && GET_CODE (PATTERN (insn)) != CLOBBER
8610 && get_attr_type (insn) == TYPE_MILLI)))
8616 /* We use this hook to perform a PA specific optimization which is difficult
8617 to do in earlier passes.
8619 We want the delay slots of branches within jump tables to be filled.
8620 None of the compiler passes at the moment even has the notion that a
8621 PA jump table doesn't contain addresses, but instead contains actual
8624 Because we actually jump into the table, the addresses of each entry
8625 must stay constant in relation to the beginning of the table (which
8626 itself must stay constant relative to the instruction to jump into
8627 it). I don't believe we can guarantee earlier passes of the compiler
8628 will adhere to those rules.
8630 So, late in the compilation process we find all the jump tables, and
8631 expand them into real code -- e.g. each entry in the jump table vector
8632 will get an appropriate label followed by a jump to the final target.
8634 Reorg and the final jump pass can then optimize these branches and
8635 fill their delay slots. We end up with smaller, more efficient code.
8637 The jump instructions within the table are special; we must be able
8638 to identify them during assembly output (if the jumps don't get filled
8639 we need to emit a nop rather than nullifying the delay slot)). We
8640 identify jumps in switch tables by using insns with the attribute
8641 type TYPE_BTABLE_BRANCH.
8643 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8644 insns. This serves two purposes, first it prevents jump.c from
8645 noticing that the last N entries in the table jump to the instruction
8646 immediately after the table and deleting the jumps. Second, those
8647 insns mark where we should emit .begin_brtab and .end_brtab directives
8648 when using GAS (allows for better link time optimizations). */
8655 remove_useless_addtr_insns (1);
8657 if (pa_cpu < PROCESSOR_8000)
8658 pa_combine_instructions ();
8661 /* This is fairly cheap, so always run it if optimizing. */
8662 if (optimize > 0 && !TARGET_BIG_SWITCH)
8664 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8665 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8667 rtx pattern, tmp, location, label;
8668 unsigned int length, i;
8670 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8671 if (GET_CODE (insn) != JUMP_INSN
8672 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8673 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8676 /* Emit marker for the beginning of the branch table. */
8677 emit_insn_before (gen_begin_brtab (), insn);
8679 pattern = PATTERN (insn);
8680 location = PREV_INSN (insn);
8681 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8683 for (i = 0; i < length; i++)
8685 /* Emit a label before each jump to keep jump.c from
8686 removing this code. */
8687 tmp = gen_label_rtx ();
8688 LABEL_NUSES (tmp) = 1;
8689 emit_label_after (tmp, location);
8690 location = NEXT_INSN (location);
8692 if (GET_CODE (pattern) == ADDR_VEC)
8693 label = XEXP (XVECEXP (pattern, 0, i), 0);
8695 label = XEXP (XVECEXP (pattern, 1, i), 0);
8697 tmp = gen_short_jump (label);
8699 /* Emit the jump itself. */
8700 tmp = emit_jump_insn_after (tmp, location);
8701 JUMP_LABEL (tmp) = label;
8702 LABEL_NUSES (label)++;
8703 location = NEXT_INSN (location);
8705 /* Emit a BARRIER after the jump. */
8706 emit_barrier_after (location);
8707 location = NEXT_INSN (location);
8710 /* Emit marker for the end of the branch table. */
8711 emit_insn_before (gen_end_brtab (), location);
8712 location = NEXT_INSN (location);
8713 emit_barrier_after (location);
8715 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8721 /* Still need brtab marker insns. FIXME: the presence of these
8722 markers disables output of the branch table to readonly memory,
8723 and any alignment directives that might be needed. Possibly,
8724 the begin_brtab insn should be output before the label for the
8725 table. This doesn't matter at the moment since the tables are
8726 always output in the text section. */
8727 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8729 /* Find an ADDR_VEC insn. */
8730 if (GET_CODE (insn) != JUMP_INSN
8731 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8732 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8735 /* Now generate markers for the beginning and end of the
8737 emit_insn_before (gen_begin_brtab (), insn);
8738 emit_insn_after (gen_end_brtab (), insn);
8743 /* The PA has a number of odd instructions which can perform multiple
8744 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8745 it may be profitable to combine two instructions into one instruction
8746 with two outputs. It's not profitable PA2.0 machines because the
8747 two outputs would take two slots in the reorder buffers.
8749 This routine finds instructions which can be combined and combines
8750 them. We only support some of the potential combinations, and we
8751 only try common ways to find suitable instructions.
8753 * addb can add two registers or a register and a small integer
8754 and jump to a nearby (+-8k) location. Normally the jump to the
8755 nearby location is conditional on the result of the add, but by
8756 using the "true" condition we can make the jump unconditional.
8757 Thus addb can perform two independent operations in one insn.
8759 * movb is similar to addb in that it can perform a reg->reg
8760 or small immediate->reg copy and jump to a nearby (+-8k location).
8762 * fmpyadd and fmpysub can perform a FP multiply and either an
8763 FP add or FP sub if the operands of the multiply and add/sub are
8764 independent (there are other minor restrictions). Note both
8765 the fmpy and fadd/fsub can in theory move to better spots according
8766 to data dependencies, but for now we require the fmpy stay at a
8769 * Many of the memory operations can perform pre & post updates
8770 of index registers. GCC's pre/post increment/decrement addressing
8771 is far too simple to take advantage of all the possibilities. This
8772 pass may not be suitable since those insns may not be independent.
8774 * comclr can compare two ints or an int and a register, nullify
8775 the following instruction and zero some other register. This
8776 is more difficult to use as it's harder to find an insn which
8777 will generate a comclr than finding something like an unconditional
8778 branch. (conditional moves & long branches create comclr insns).
8780 * Most arithmetic operations can conditionally skip the next
8781 instruction. They can be viewed as "perform this operation
8782 and conditionally jump to this nearby location" (where nearby
8783 is an insns away). These are difficult to use due to the
8784 branch length restrictions. */
8787 pa_combine_instructions (void)
8791 /* This can get expensive since the basic algorithm is on the
8792 order of O(n^2) (or worse). Only do it for -O2 or higher
8793 levels of optimization. */
8797 /* Walk down the list of insns looking for "anchor" insns which
8798 may be combined with "floating" insns. As the name implies,
8799 "anchor" instructions don't move, while "floating" insns may
8801 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8802 new = make_insn_raw (new);
8804 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8806 enum attr_pa_combine_type anchor_attr;
8807 enum attr_pa_combine_type floater_attr;
8809 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8810 Also ignore any special USE insns. */
8811 if ((GET_CODE (anchor) != INSN
8812 && GET_CODE (anchor) != JUMP_INSN
8813 && GET_CODE (anchor) != CALL_INSN)
8814 || GET_CODE (PATTERN (anchor)) == USE
8815 || GET_CODE (PATTERN (anchor)) == CLOBBER
8816 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8817 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8820 anchor_attr = get_attr_pa_combine_type (anchor);
8821 /* See if anchor is an insn suitable for combination. */
8822 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8823 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8824 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8825 && ! forward_branch_p (anchor)))
8829 for (floater = PREV_INSN (anchor);
8831 floater = PREV_INSN (floater))
8833 if (GET_CODE (floater) == NOTE
8834 || (GET_CODE (floater) == INSN
8835 && (GET_CODE (PATTERN (floater)) == USE
8836 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8839 /* Anything except a regular INSN will stop our search. */
8840 if (GET_CODE (floater) != INSN
8841 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8842 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8848 /* See if FLOATER is suitable for combination with the
8850 floater_attr = get_attr_pa_combine_type (floater);
8851 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8852 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8853 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8854 && floater_attr == PA_COMBINE_TYPE_FMPY))
8856 /* If ANCHOR and FLOATER can be combined, then we're
8857 done with this pass. */
8858 if (pa_can_combine_p (new, anchor, floater, 0,
8859 SET_DEST (PATTERN (floater)),
8860 XEXP (SET_SRC (PATTERN (floater)), 0),
8861 XEXP (SET_SRC (PATTERN (floater)), 1)))
8865 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8866 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8868 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8870 if (pa_can_combine_p (new, anchor, floater, 0,
8871 SET_DEST (PATTERN (floater)),
8872 XEXP (SET_SRC (PATTERN (floater)), 0),
8873 XEXP (SET_SRC (PATTERN (floater)), 1)))
8878 if (pa_can_combine_p (new, anchor, floater, 0,
8879 SET_DEST (PATTERN (floater)),
8880 SET_SRC (PATTERN (floater)),
8881 SET_SRC (PATTERN (floater))))
8887 /* If we didn't find anything on the backwards scan try forwards. */
8889 && (anchor_attr == PA_COMBINE_TYPE_FMPY
8890 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8892 for (floater = anchor; floater; floater = NEXT_INSN (floater))
8894 if (GET_CODE (floater) == NOTE
8895 || (GET_CODE (floater) == INSN
8896 && (GET_CODE (PATTERN (floater)) == USE
8897 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8901 /* Anything except a regular INSN will stop our search. */
8902 if (GET_CODE (floater) != INSN
8903 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8904 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8910 /* See if FLOATER is suitable for combination with the
8912 floater_attr = get_attr_pa_combine_type (floater);
8913 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8914 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8915 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8916 && floater_attr == PA_COMBINE_TYPE_FMPY))
8918 /* If ANCHOR and FLOATER can be combined, then we're
8919 done with this pass. */
8920 if (pa_can_combine_p (new, anchor, floater, 1,
8921 SET_DEST (PATTERN (floater)),
8922 XEXP (SET_SRC (PATTERN (floater)),
8924 XEXP (SET_SRC (PATTERN (floater)),
8931 /* FLOATER will be nonzero if we found a suitable floating
8932 insn for combination with ANCHOR. */
8934 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8935 || anchor_attr == PA_COMBINE_TYPE_FMPY))
8937 /* Emit the new instruction and delete the old anchor. */
8938 emit_insn_before (gen_rtx_PARALLEL
8940 gen_rtvec (2, PATTERN (anchor),
8941 PATTERN (floater))),
8944 SET_INSN_DELETED (anchor);
8946 /* Emit a special USE insn for FLOATER, then delete
8947 the floating insn. */
8948 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8949 delete_insn (floater);
8954 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8957 /* Emit the new_jump instruction and delete the old anchor. */
8959 = emit_jump_insn_before (gen_rtx_PARALLEL
8961 gen_rtvec (2, PATTERN (anchor),
8962 PATTERN (floater))),
8965 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8966 SET_INSN_DELETED (anchor);
8968 /* Emit a special USE insn for FLOATER, then delete
8969 the floating insn. */
8970 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8971 delete_insn (floater);
8979 pa_can_combine_p (rtx new, rtx anchor, rtx floater, int reversed, rtx dest,
8982 int insn_code_number;
8985 /* Create a PARALLEL with the patterns of ANCHOR and
8986 FLOATER, try to recognize it, then test constraints
8987 for the resulting pattern.
8989 If the pattern doesn't match or the constraints
8990 aren't met keep searching for a suitable floater
8992 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8993 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8994 INSN_CODE (new) = -1;
8995 insn_code_number = recog_memoized (new);
8996 if (insn_code_number < 0
8997 || (extract_insn (new), ! constrain_operands (1)))
9011 /* There's up to three operands to consider. One
9012 output and two inputs.
9014 The output must not be used between FLOATER & ANCHOR
9015 exclusive. The inputs must not be set between
9016 FLOATER and ANCHOR exclusive. */
9018 if (reg_used_between_p (dest, start, end))
9021 if (reg_set_between_p (src1, start, end))
9024 if (reg_set_between_p (src2, start, end))
9027 /* If we get here, then everything is good. */
9031 /* Return nonzero if references for INSN are delayed.
9033 Millicode insns are actually function calls with some special
9034 constraints on arguments and register usage.
9036 Millicode calls always expect their arguments in the integer argument
9037 registers, and always return their result in %r29 (ret1). They
9038 are expected to clobber their arguments, %r1, %r29, and the return
9039 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9041 This function tells reorg that the references to arguments and
9042 millicode calls do not appear to happen until after the millicode call.
9043 This allows reorg to put insns which set the argument registers into the
9044 delay slot of the millicode call -- thus they act more like traditional
9047 Note we cannot consider side effects of the insn to be delayed because
9048 the branch and link insn will clobber the return pointer. If we happened
9049 to use the return pointer in the delay slot of the call, then we lose.
9051 get_attr_type will try to recognize the given insn, so make sure to
9052 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9055 insn_refs_are_delayed (rtx insn)
9057 return ((GET_CODE (insn) == INSN
9058 && GET_CODE (PATTERN (insn)) != SEQUENCE
9059 && GET_CODE (PATTERN (insn)) != USE
9060 && GET_CODE (PATTERN (insn)) != CLOBBER
9061 && get_attr_type (insn) == TYPE_MILLI));
9064 /* On the HP-PA the value is found in register(s) 28(-29), unless
9065 the mode is SF or DF. Then the value is returned in fr4 (32).
9067 This must perform the same promotions as PROMOTE_MODE, else
9068 TARGET_PROMOTE_FUNCTION_RETURN will not work correctly.
9070 Small structures must be returned in a PARALLEL on PA64 in order
9071 to match the HP Compiler ABI. */
9074 function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED)
9076 enum machine_mode valmode;
9078 if (AGGREGATE_TYPE_P (valtype)
9079 || TREE_CODE (valtype) == COMPLEX_TYPE
9080 || TREE_CODE (valtype) == VECTOR_TYPE)
9084 /* Aggregates with a size less than or equal to 128 bits are
9085 returned in GR 28(-29). They are left justified. The pad
9086 bits are undefined. Larger aggregates are returned in
9090 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9092 for (i = 0; i < ub; i++)
9094 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9095 gen_rtx_REG (DImode, 28 + i),
9100 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9102 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9104 /* Aggregates 5 to 8 bytes in size are returned in general
9105 registers r28-r29 in the same manner as other non
9106 floating-point objects. The data is right-justified and
9107 zero-extended to 64 bits. This is opposite to the normal
9108 justification used on big endian targets and requires
9109 special treatment. */
9110 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9111 gen_rtx_REG (DImode, 28), const0_rtx);
9112 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9116 if ((INTEGRAL_TYPE_P (valtype)
9117 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9118 || POINTER_TYPE_P (valtype))
9119 valmode = word_mode;
9121 valmode = TYPE_MODE (valtype);
9123 if (TREE_CODE (valtype) == REAL_TYPE
9124 && !AGGREGATE_TYPE_P (valtype)
9125 && TYPE_MODE (valtype) != TFmode
9126 && !TARGET_SOFT_FLOAT)
9127 return gen_rtx_REG (valmode, 32);
9129 return gen_rtx_REG (valmode, 28);
9132 /* Return the location of a parameter that is passed in a register or NULL
9133 if the parameter has any component that is passed in memory.
9135 This is new code and will be pushed to into the net sources after
9138 ??? We might want to restructure this so that it looks more like other
9141 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
9142 int named ATTRIBUTE_UNUSED)
9144 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9151 if (mode == VOIDmode)
9154 arg_size = FUNCTION_ARG_SIZE (mode, type);
9156 /* If this arg would be passed partially or totally on the stack, then
9157 this routine should return zero. pa_arg_partial_bytes will
9158 handle arguments which are split between regs and stack slots if
9159 the ABI mandates split arguments. */
9162 /* The 32-bit ABI does not split arguments. */
9163 if (cum->words + arg_size > max_arg_words)
9169 alignment = cum->words & 1;
9170 if (cum->words + alignment >= max_arg_words)
9174 /* The 32bit ABIs and the 64bit ABIs are rather different,
9175 particularly in their handling of FP registers. We might
9176 be able to cleverly share code between them, but I'm not
9177 going to bother in the hope that splitting them up results
9178 in code that is more easily understood. */
9182 /* Advance the base registers to their current locations.
9184 Remember, gprs grow towards smaller register numbers while
9185 fprs grow to higher register numbers. Also remember that
9186 although FP regs are 32-bit addressable, we pretend that
9187 the registers are 64-bits wide. */
9188 gpr_reg_base = 26 - cum->words;
9189 fpr_reg_base = 32 + cum->words;
9191 /* Arguments wider than one word and small aggregates need special
9195 || (type && (AGGREGATE_TYPE_P (type)
9196 || TREE_CODE (type) == COMPLEX_TYPE
9197 || TREE_CODE (type) == VECTOR_TYPE)))
9199 /* Double-extended precision (80-bit), quad-precision (128-bit)
9200 and aggregates including complex numbers are aligned on
9201 128-bit boundaries. The first eight 64-bit argument slots
9202 are associated one-to-one, with general registers r26
9203 through r19, and also with floating-point registers fr4
9204 through fr11. Arguments larger than one word are always
9205 passed in general registers.
9207 Using a PARALLEL with a word mode register results in left
9208 justified data on a big-endian target. */
9211 int i, offset = 0, ub = arg_size;
9213 /* Align the base register. */
9214 gpr_reg_base -= alignment;
9216 ub = MIN (ub, max_arg_words - cum->words - alignment);
9217 for (i = 0; i < ub; i++)
9219 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9220 gen_rtx_REG (DImode, gpr_reg_base),
9226 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9231 /* If the argument is larger than a word, then we know precisely
9232 which registers we must use. */
9246 /* Structures 5 to 8 bytes in size are passed in the general
9247 registers in the same manner as other non floating-point
9248 objects. The data is right-justified and zero-extended
9249 to 64 bits. This is opposite to the normal justification
9250 used on big endian targets and requires special treatment.
9251 We now define BLOCK_REG_PADDING to pad these objects.
9252 Aggregates, complex and vector types are passed in the same
9253 manner as structures. */
9255 || (type && (AGGREGATE_TYPE_P (type)
9256 || TREE_CODE (type) == COMPLEX_TYPE
9257 || TREE_CODE (type) == VECTOR_TYPE)))
9259 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9260 gen_rtx_REG (DImode, gpr_reg_base),
9262 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9267 /* We have a single word (32 bits). A simple computation
9268 will get us the register #s we need. */
9269 gpr_reg_base = 26 - cum->words;
9270 fpr_reg_base = 32 + 2 * cum->words;
9274 /* Determine if the argument needs to be passed in both general and
9275 floating point registers. */
9276 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9277 /* If we are doing soft-float with portable runtime, then there
9278 is no need to worry about FP regs. */
9279 && !TARGET_SOFT_FLOAT
9280 /* The parameter must be some kind of scalar float, else we just
9281 pass it in integer registers. */
9282 && GET_MODE_CLASS (mode) == MODE_FLOAT
9283 /* The target function must not have a prototype. */
9284 && cum->nargs_prototype <= 0
9285 /* libcalls do not need to pass items in both FP and general
9287 && type != NULL_TREE
9288 /* All this hair applies to "outgoing" args only. This includes
9289 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9291 /* Also pass outgoing floating arguments in both registers in indirect
9292 calls with the 32 bit ABI and the HP assembler since there is no
9293 way to the specify argument locations in static functions. */
9298 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9304 gen_rtx_EXPR_LIST (VOIDmode,
9305 gen_rtx_REG (mode, fpr_reg_base),
9307 gen_rtx_EXPR_LIST (VOIDmode,
9308 gen_rtx_REG (mode, gpr_reg_base),
9313 /* See if we should pass this parameter in a general register. */
9314 if (TARGET_SOFT_FLOAT
9315 /* Indirect calls in the normal 32bit ABI require all arguments
9316 to be passed in general registers. */
9317 || (!TARGET_PORTABLE_RUNTIME
9321 /* If the parameter is not a scalar floating-point parameter,
9322 then it belongs in GPRs. */
9323 || GET_MODE_CLASS (mode) != MODE_FLOAT
9324 /* Structure with single SFmode field belongs in GPR. */
9325 || (type && AGGREGATE_TYPE_P (type)))
9326 retval = gen_rtx_REG (mode, gpr_reg_base);
9328 retval = gen_rtx_REG (mode, fpr_reg_base);
9334 /* If this arg would be passed totally in registers or totally on the stack,
9335 then this routine should return zero. */
9338 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9339 tree type, bool named ATTRIBUTE_UNUSED)
9341 unsigned int max_arg_words = 8;
9342 unsigned int offset = 0;
9347 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9350 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9351 /* Arg fits fully into registers. */
9353 else if (cum->words + offset >= max_arg_words)
9354 /* Arg fully on the stack. */
9358 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9362 /* A get_unnamed_section callback for switching to the text section.
9364 This function is only used with SOM. Because we don't support
9365 named subspaces, we can only create a new subspace or switch back
9366 to the default text subspace. */
9369 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9371 gcc_assert (TARGET_SOM);
9374 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9376 /* We only want to emit a .nsubspa directive once at the
9377 start of the function. */
9378 cfun->machine->in_nsubspa = 1;
9380 /* Create a new subspace for the text. This provides
9381 better stub placement and one-only functions. */
9383 && DECL_ONE_ONLY (cfun->decl)
9384 && !DECL_WEAK (cfun->decl))
9386 output_section_asm_op ("\t.SPACE $TEXT$\n"
9387 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9388 "ACCESS=44,SORT=24,COMDAT");
9394 /* There isn't a current function or the body of the current
9395 function has been completed. So, we are changing to the
9396 text section to output debugging information. Thus, we
9397 need to forget that we are in the text section so that
9398 varasm.c will call us when text_section is selected again. */
9399 gcc_assert (!cfun || !cfun->machine
9400 || cfun->machine->in_nsubspa == 2);
9403 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9406 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9409 /* A get_unnamed_section callback for switching to comdat data
9410 sections. This function is only used with SOM. */
9413 som_output_comdat_data_section_asm_op (const void *data)
9416 output_section_asm_op (data);
9419 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9422 pa_som_asm_init_sections (void)
9425 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9427 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9428 is not being generated. */
9429 som_readonly_data_section
9430 = get_unnamed_section (0, output_section_asm_op,
9431 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9433 /* When secondary definitions are not supported, SOM makes readonly
9434 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9436 som_one_only_readonly_data_section
9437 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9439 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9440 "ACCESS=0x2c,SORT=16,COMDAT");
9443 /* When secondary definitions are not supported, SOM makes data one-only
9444 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9445 som_one_only_data_section
9446 = get_unnamed_section (SECTION_WRITE,
9447 som_output_comdat_data_section_asm_op,
9448 "\t.SPACE $PRIVATE$\n"
9449 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9450 "ACCESS=31,SORT=24,COMDAT");
9452 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9453 which reference data within the $TEXT$ space (for example constant
9454 strings in the $LIT$ subspace).
9456 The assemblers (GAS and HP as) both have problems with handling
9457 the difference of two symbols which is the other correct way to
9458 reference constant data during PIC code generation.
9460 So, there's no way to reference constant data which is in the
9461 $TEXT$ space during PIC generation. Instead place all constant
9462 data into the $PRIVATE$ subspace (this reduces sharing, but it
9463 works correctly). */
9464 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9466 /* We must not have a reference to an external symbol defined in a
9467 shared library in a readonly section, else the SOM linker will
9470 So, we force exception information into the data section. */
9471 exception_section = data_section;
9474 /* On hpux10, the linker will give an error if we have a reference
9475 in the read-only data section to a symbol defined in a shared
9476 library. Therefore, expressions that might require a reloc can
9477 not be placed in the read-only data section. */
9480 pa_select_section (tree exp, int reloc,
9481 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9483 if (TREE_CODE (exp) == VAR_DECL
9484 && TREE_READONLY (exp)
9485 && !TREE_THIS_VOLATILE (exp)
9486 && DECL_INITIAL (exp)
9487 && (DECL_INITIAL (exp) == error_mark_node
9488 || TREE_CONSTANT (DECL_INITIAL (exp)))
9492 && DECL_ONE_ONLY (exp)
9493 && !DECL_WEAK (exp))
9494 return som_one_only_readonly_data_section;
9496 return readonly_data_section;
9498 else if (CONSTANT_CLASS_P (exp) && !reloc)
9499 return readonly_data_section;
9501 && TREE_CODE (exp) == VAR_DECL
9502 && DECL_ONE_ONLY (exp)
9503 && !DECL_WEAK (exp))
9504 return som_one_only_data_section;
9506 return data_section;
9510 pa_globalize_label (FILE *stream, const char *name)
9512 /* We only handle DATA objects here, functions are globalized in
9513 ASM_DECLARE_FUNCTION_NAME. */
9514 if (! FUNCTION_NAME_P (name))
9516 fputs ("\t.EXPORT ", stream);
9517 assemble_name (stream, name);
9518 fputs (",DATA\n", stream);
9522 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9525 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9526 int incoming ATTRIBUTE_UNUSED)
9528 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9531 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9534 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9536 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9537 PA64 ABI says that objects larger than 128 bits are returned in memory.
9538 Note, int_size_in_bytes can return -1 if the size of the object is
9539 variable or larger than the maximum value that can be expressed as
9540 a HOST_WIDE_INT. It can also return zero for an empty type. The
9541 simplest way to handle variable and empty types is to pass them in
9542 memory. This avoids problems in defining the boundaries of argument
9543 slots, allocating registers, etc. */
9544 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9545 || int_size_in_bytes (type) <= 0);
9548 /* Structure to hold declaration and name of external symbols that are
9549 emitted by GCC. We generate a vector of these symbols and output them
9550 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9551 This avoids putting out names that are never really used. */
9553 typedef struct extern_symbol GTY(())
9559 /* Define gc'd vector type for extern_symbol. */
9560 DEF_VEC_O(extern_symbol);
9561 DEF_VEC_ALLOC_O(extern_symbol,gc);
9563 /* Vector of extern_symbol pointers. */
9564 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9566 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9567 /* Mark DECL (name NAME) as an external reference (assembler output
9568 file FILE). This saves the names to output at the end of the file
9569 if actually referenced. */
9572 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9574 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9576 gcc_assert (file == asm_out_file);
9581 /* Output text required at the end of an assembler file.
9582 This includes deferred plabels and .import directives for
9583 all external symbols that were actually referenced. */
9586 pa_hpux_file_end (void)
9591 if (!NO_DEFERRED_PROFILE_COUNTERS)
9592 output_deferred_profile_counters ();
9594 output_deferred_plabels ();
9596 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9598 tree decl = p->decl;
9600 if (!TREE_ASM_WRITTEN (decl)
9601 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9602 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9605 VEC_free (extern_symbol, gc, extern_symbols);
9609 /* Return true if a change from mode FROM to mode TO for a register
9610 in register class CLASS is invalid. */
9613 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9614 enum reg_class class)
9619 /* Reject changes to/from complex and vector modes. */
9620 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9621 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9624 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9627 /* There is no way to load QImode or HImode values directly from
9628 memory. SImode loads to the FP registers are not zero extended.
9629 On the 64-bit target, this conflicts with the definition of
9630 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9631 with different sizes in the floating-point registers. */
9632 if (MAYBE_FP_REG_CLASS_P (class))
9635 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9636 in specific sets of registers. Thus, we cannot allow changing
9637 to a larger mode when it's larger than a word. */
9638 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9639 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9645 /* Returns TRUE if it is a good idea to tie two pseudo registers
9646 when one has mode MODE1 and one has mode MODE2.
9647 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9648 for any hard reg, then this must be FALSE for correct output.
9650 We should return FALSE for QImode and HImode because these modes
9651 are not ok in the floating-point registers. However, this prevents
9652 tieing these modes to SImode and DImode in the general registers.
9653 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9654 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9655 in the floating-point registers. */
9658 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9660 /* Don't tie modes in different classes. */
9661 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))