1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "insn-attr.h"
40 #include "integrate.h"
48 #include "target-def.h"
51 /* Return nonzero if there is a bypass for the output of
52 OUT_INSN and the fp store IN_INSN. */
54 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
56 enum machine_mode store_mode;
57 enum machine_mode other_mode;
60 if (recog_memoized (in_insn) < 0
61 || (get_attr_type (in_insn) != TYPE_FPSTORE
62 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
63 || recog_memoized (out_insn) < 0)
66 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
68 set = single_set (out_insn);
72 other_mode = GET_MODE (SET_SRC (set));
74 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
78 #ifndef DO_FRAME_NOTES
79 #ifdef INCOMING_RETURN_ADDR_RTX
80 #define DO_FRAME_NOTES 1
82 #define DO_FRAME_NOTES 0
86 static void copy_reg_pointer (rtx, rtx);
87 static void fix_range (const char *);
88 static bool pa_handle_option (size_t, const char *, int);
89 static int hppa_address_cost (rtx, bool);
90 static bool hppa_rtx_costs (rtx, int, int, int *, bool);
91 static inline rtx force_mode (enum machine_mode, rtx);
92 static void pa_reorg (void);
93 static void pa_combine_instructions (void);
94 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
95 static bool forward_branch_p (rtx);
96 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
97 static int compute_movmem_length (rtx);
98 static int compute_clrmem_length (rtx);
99 static bool pa_assemble_integer (rtx, unsigned int, int);
100 static void remove_useless_addtr_insns (int);
101 static void store_reg (int, HOST_WIDE_INT, int);
102 static void store_reg_modify (int, int, HOST_WIDE_INT);
103 static void load_reg (int, HOST_WIDE_INT, int);
104 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
105 static rtx pa_function_value (const_tree, const_tree, bool);
106 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
107 static void update_total_code_bytes (unsigned int);
108 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
109 static int pa_adjust_cost (rtx, rtx, rtx, int);
110 static int pa_adjust_priority (rtx, int);
111 static int pa_issue_rate (void);
112 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
113 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
115 static void pa_encode_section_info (tree, rtx, int);
116 static const char *pa_strip_name_encoding (const char *);
117 static bool pa_function_ok_for_sibcall (tree, tree);
118 static void pa_globalize_label (FILE *, const char *)
120 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
121 HOST_WIDE_INT, tree);
122 #if !defined(USE_COLLECT2)
123 static void pa_asm_out_constructor (rtx, int);
124 static void pa_asm_out_destructor (rtx, int);
126 static void pa_init_builtins (void);
127 static rtx hppa_builtin_saveregs (void);
128 static void hppa_va_start (tree, rtx);
129 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
130 static bool pa_scalar_mode_supported_p (enum machine_mode);
131 static bool pa_commutative_p (const_rtx x, int outer_code);
132 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
133 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
134 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
135 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
136 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
137 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
138 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
139 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
140 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
141 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
142 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
143 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
144 static void output_deferred_plabels (void);
145 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
146 #ifdef ASM_OUTPUT_EXTERNAL_REAL
147 static void pa_hpux_file_end (void);
149 #ifdef HPUX_LONG_DOUBLE_LIBRARY
150 static void pa_hpux_init_libfuncs (void);
152 static rtx pa_struct_value_rtx (tree, int);
153 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
155 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
157 static struct machine_function * pa_init_machine_status (void);
158 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
160 secondary_reload_info *);
161 static void pa_extra_live_on_entry (bitmap);
162 static enum machine_mode pa_promote_function_mode (const_tree,
163 enum machine_mode, int *,
166 static void pa_asm_trampoline_template (FILE *);
167 static void pa_trampoline_init (rtx, tree, rtx);
168 static rtx pa_trampoline_adjust_address (rtx);
169 static rtx pa_delegitimize_address (rtx);
171 /* The following extra sections are only used for SOM. */
172 static GTY(()) section *som_readonly_data_section;
173 static GTY(()) section *som_one_only_readonly_data_section;
174 static GTY(()) section *som_one_only_data_section;
176 /* Which cpu we are scheduling for. */
177 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
179 /* The UNIX standard to use for predefines and linking. */
180 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
182 /* Counts for the number of callee-saved general and floating point
183 registers which were saved by the current function's prologue. */
184 static int gr_saved, fr_saved;
186 /* Boolean indicating whether the return pointer was saved by the
187 current function's prologue. */
188 static bool rp_saved;
190 static rtx find_addr_reg (rtx);
192 /* Keep track of the number of bytes we have output in the CODE subspace
193 during this compilation so we'll know when to emit inline long-calls. */
194 unsigned long total_code_bytes;
196 /* The last address of the previous function plus the number of bytes in
197 associated thunks that have been output. This is used to determine if
198 a thunk can use an IA-relative branch to reach its target function. */
199 static unsigned int last_address;
201 /* Variables to handle plabels that we discover are necessary at assembly
202 output time. They are output after the current function. */
203 struct GTY(()) deferred_plabel
208 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
210 static size_t n_deferred_plabels = 0;
213 /* Initialize the GCC target structure. */
215 #undef TARGET_ASM_ALIGNED_HI_OP
216 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
217 #undef TARGET_ASM_ALIGNED_SI_OP
218 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
219 #undef TARGET_ASM_ALIGNED_DI_OP
220 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
221 #undef TARGET_ASM_UNALIGNED_HI_OP
222 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
223 #undef TARGET_ASM_UNALIGNED_SI_OP
224 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
225 #undef TARGET_ASM_UNALIGNED_DI_OP
226 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
227 #undef TARGET_ASM_INTEGER
228 #define TARGET_ASM_INTEGER pa_assemble_integer
230 #undef TARGET_ASM_FUNCTION_PROLOGUE
231 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
232 #undef TARGET_ASM_FUNCTION_EPILOGUE
233 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
235 #undef TARGET_FUNCTION_VALUE
236 #define TARGET_FUNCTION_VALUE pa_function_value
238 #undef TARGET_LEGITIMIZE_ADDRESS
239 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
241 #undef TARGET_SCHED_ADJUST_COST
242 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
243 #undef TARGET_SCHED_ADJUST_PRIORITY
244 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
245 #undef TARGET_SCHED_ISSUE_RATE
246 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
248 #undef TARGET_ENCODE_SECTION_INFO
249 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
250 #undef TARGET_STRIP_NAME_ENCODING
251 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
253 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
254 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
256 #undef TARGET_COMMUTATIVE_P
257 #define TARGET_COMMUTATIVE_P pa_commutative_p
259 #undef TARGET_ASM_OUTPUT_MI_THUNK
260 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
261 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
262 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
264 #undef TARGET_ASM_FILE_END
265 #ifdef ASM_OUTPUT_EXTERNAL_REAL
266 #define TARGET_ASM_FILE_END pa_hpux_file_end
268 #define TARGET_ASM_FILE_END output_deferred_plabels
271 #if !defined(USE_COLLECT2)
272 #undef TARGET_ASM_CONSTRUCTOR
273 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
274 #undef TARGET_ASM_DESTRUCTOR
275 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
278 #undef TARGET_DEFAULT_TARGET_FLAGS
279 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
280 #undef TARGET_HANDLE_OPTION
281 #define TARGET_HANDLE_OPTION pa_handle_option
283 #undef TARGET_INIT_BUILTINS
284 #define TARGET_INIT_BUILTINS pa_init_builtins
286 #undef TARGET_RTX_COSTS
287 #define TARGET_RTX_COSTS hppa_rtx_costs
288 #undef TARGET_ADDRESS_COST
289 #define TARGET_ADDRESS_COST hppa_address_cost
291 #undef TARGET_MACHINE_DEPENDENT_REORG
292 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
294 #ifdef HPUX_LONG_DOUBLE_LIBRARY
295 #undef TARGET_INIT_LIBFUNCS
296 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
299 #undef TARGET_PROMOTE_FUNCTION_MODE
300 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
301 #undef TARGET_PROMOTE_PROTOTYPES
302 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
304 #undef TARGET_STRUCT_VALUE_RTX
305 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
306 #undef TARGET_RETURN_IN_MEMORY
307 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
308 #undef TARGET_MUST_PASS_IN_STACK
309 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
310 #undef TARGET_PASS_BY_REFERENCE
311 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
312 #undef TARGET_CALLEE_COPIES
313 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
314 #undef TARGET_ARG_PARTIAL_BYTES
315 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
317 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
318 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
319 #undef TARGET_EXPAND_BUILTIN_VA_START
320 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
321 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
322 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
324 #undef TARGET_SCALAR_MODE_SUPPORTED_P
325 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
327 #undef TARGET_CANNOT_FORCE_CONST_MEM
328 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
330 #undef TARGET_SECONDARY_RELOAD
331 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
333 #undef TARGET_EXTRA_LIVE_ON_ENTRY
334 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
336 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
337 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
338 #undef TARGET_TRAMPOLINE_INIT
339 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
340 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
341 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
342 #undef TARGET_DELEGITIMIZE_ADDRESS
343 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
345 struct gcc_target targetm = TARGET_INITIALIZER;
347 /* Parse the -mfixed-range= option string. */
350 fix_range (const char *const_str)
353 char *str, *dash, *comma;
355 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
356 REG2 are either register names or register numbers. The effect
357 of this option is to mark the registers in the range from REG1 to
358 REG2 as ``fixed'' so they won't be used by the compiler. This is
359 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
361 i = strlen (const_str);
362 str = (char *) alloca (i + 1);
363 memcpy (str, const_str, i + 1);
367 dash = strchr (str, '-');
370 warning (0, "value of -mfixed-range must have form REG1-REG2");
375 comma = strchr (dash + 1, ',');
379 first = decode_reg_name (str);
382 warning (0, "unknown register name: %s", str);
386 last = decode_reg_name (dash + 1);
389 warning (0, "unknown register name: %s", dash + 1);
397 warning (0, "%s-%s is an empty range", str, dash + 1);
401 for (i = first; i <= last; ++i)
402 fixed_regs[i] = call_used_regs[i] = 1;
411 /* Check if all floating point registers have been fixed. */
412 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
417 target_flags |= MASK_DISABLE_FPREGS;
420 /* Implement TARGET_HANDLE_OPTION. */
423 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
428 case OPT_mpa_risc_1_0:
430 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
434 case OPT_mpa_risc_1_1:
436 target_flags &= ~MASK_PA_20;
437 target_flags |= MASK_PA_11;
440 case OPT_mpa_risc_2_0:
442 target_flags |= MASK_PA_11 | MASK_PA_20;
446 if (strcmp (arg, "8000") == 0)
447 pa_cpu = PROCESSOR_8000;
448 else if (strcmp (arg, "7100") == 0)
449 pa_cpu = PROCESSOR_7100;
450 else if (strcmp (arg, "700") == 0)
451 pa_cpu = PROCESSOR_700;
452 else if (strcmp (arg, "7100LC") == 0)
453 pa_cpu = PROCESSOR_7100LC;
454 else if (strcmp (arg, "7200") == 0)
455 pa_cpu = PROCESSOR_7200;
456 else if (strcmp (arg, "7300") == 0)
457 pa_cpu = PROCESSOR_7300;
462 case OPT_mfixed_range_:
472 #if TARGET_HPUX_10_10
478 #if TARGET_HPUX_11_11
490 override_options (void)
492 /* Unconditional branches in the delay slot are not compatible with dwarf2
493 call frame information. There is no benefit in using this optimization
494 on PA8000 and later processors. */
495 if (pa_cpu >= PROCESSOR_8000
496 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
497 || flag_unwind_tables)
498 target_flags &= ~MASK_JUMP_IN_DELAY;
500 if (flag_pic && TARGET_PORTABLE_RUNTIME)
502 warning (0, "PIC code generation is not supported in the portable runtime model");
505 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
507 warning (0, "PIC code generation is not compatible with fast indirect calls");
510 if (! TARGET_GAS && write_symbols != NO_DEBUG)
512 warning (0, "-g is only supported when using GAS on this processor,");
513 warning (0, "-g option disabled");
514 write_symbols = NO_DEBUG;
517 /* We only support the "big PIC" model now. And we always generate PIC
518 code when in 64bit mode. */
519 if (flag_pic == 1 || TARGET_64BIT)
522 /* Disable -freorder-blocks-and-partition as we don't support hot and
523 cold partitioning. */
524 if (flag_reorder_blocks_and_partition)
526 inform (input_location,
527 "-freorder-blocks-and-partition does not work "
528 "on this architecture");
529 flag_reorder_blocks_and_partition = 0;
530 flag_reorder_blocks = 1;
533 /* We can't guarantee that .dword is available for 32-bit targets. */
534 if (UNITS_PER_WORD == 4)
535 targetm.asm_out.aligned_op.di = NULL;
537 /* The unaligned ops are only available when using GAS. */
540 targetm.asm_out.unaligned_op.hi = NULL;
541 targetm.asm_out.unaligned_op.si = NULL;
542 targetm.asm_out.unaligned_op.di = NULL;
545 init_machine_status = pa_init_machine_status;
549 pa_init_builtins (void)
551 #ifdef DONT_HAVE_FPUTC_UNLOCKED
552 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
553 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
554 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
555 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
558 if (built_in_decls [BUILT_IN_FINITE])
559 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
560 if (built_in_decls [BUILT_IN_FINITEF])
561 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
565 /* Function to init struct machine_function.
566 This will be called, via a pointer variable,
567 from push_function_context. */
569 static struct machine_function *
570 pa_init_machine_status (void)
572 return ggc_alloc_cleared_machine_function ();
575 /* If FROM is a probable pointer register, mark TO as a probable
576 pointer register with the same pointer alignment as FROM. */
579 copy_reg_pointer (rtx to, rtx from)
581 if (REG_POINTER (from))
582 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
585 /* Return 1 if X contains a symbolic expression. We know these
586 expressions will have one of a few well defined forms, so
587 we need only check those forms. */
589 symbolic_expression_p (rtx x)
592 /* Strip off any HIGH. */
593 if (GET_CODE (x) == HIGH)
596 return (symbolic_operand (x, VOIDmode));
599 /* Accept any constant that can be moved in one instruction into a
602 cint_ok_for_move (HOST_WIDE_INT ival)
604 /* OK if ldo, ldil, or zdepi, can be used. */
605 return (VAL_14_BITS_P (ival)
606 || ldil_cint_p (ival)
607 || zdepi_cint_p (ival));
610 /* Return truth value of whether OP can be used as an operand in a
613 adddi3_operand (rtx op, enum machine_mode mode)
615 return (register_operand (op, mode)
616 || (GET_CODE (op) == CONST_INT
617 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
620 /* True iff the operand OP can be used as the destination operand of
621 an integer store. This also implies the operand could be used as
622 the source operand of an integer load. Symbolic, lo_sum and indexed
623 memory operands are not allowed. We accept reloading pseudos and
624 other memory operands. */
626 integer_store_memory_operand (rtx op, enum machine_mode mode)
628 return ((reload_in_progress
630 && REGNO (op) >= FIRST_PSEUDO_REGISTER
631 && reg_renumber [REGNO (op)] < 0)
632 || (GET_CODE (op) == MEM
633 && (reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
634 && !symbolic_memory_operand (op, VOIDmode)
635 && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
636 && !IS_INDEX_ADDR_P (XEXP (op, 0))));
639 /* True iff ldil can be used to load this CONST_INT. The least
640 significant 11 bits of the value must be zero and the value must
641 not change sign when extended from 32 to 64 bits. */
643 ldil_cint_p (HOST_WIDE_INT ival)
645 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
647 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
650 /* True iff zdepi can be used to generate this CONST_INT.
651 zdepi first sign extends a 5-bit signed number to a given field
652 length, then places this field anywhere in a zero. */
654 zdepi_cint_p (unsigned HOST_WIDE_INT x)
656 unsigned HOST_WIDE_INT lsb_mask, t;
658 /* This might not be obvious, but it's at least fast.
659 This function is critical; we don't have the time loops would take. */
661 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
662 /* Return true iff t is a power of two. */
663 return ((t & (t - 1)) == 0);
666 /* True iff depi or extru can be used to compute (reg & mask).
667 Accept bit pattern like these:
672 and_mask_p (unsigned HOST_WIDE_INT mask)
675 mask += mask & -mask;
676 return (mask & (mask - 1)) == 0;
679 /* True iff depi can be used to compute (reg | MASK). */
681 ior_mask_p (unsigned HOST_WIDE_INT mask)
683 mask += mask & -mask;
684 return (mask & (mask - 1)) == 0;
687 /* Legitimize PIC addresses. If the address is already
688 position-independent, we return ORIG. Newly generated
689 position-independent addresses go to REG. If we need more
690 than one register, we lose. */
693 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
697 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
699 /* Labels need special handling. */
700 if (pic_label_operand (orig, mode))
704 /* We do not want to go through the movXX expanders here since that
705 would create recursion.
707 Nor do we really want to call a generator for a named pattern
708 since that requires multiple patterns if we want to support
711 So instead we just emit the raw set, which avoids the movXX
712 expanders completely. */
713 mark_reg_pointer (reg, BITS_PER_UNIT);
714 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
716 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
717 add_reg_note (insn, REG_EQUAL, orig);
719 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
720 and update LABEL_NUSES because this is not done automatically. */
721 if (reload_in_progress || reload_completed)
723 /* Extract LABEL_REF. */
724 if (GET_CODE (orig) == CONST)
725 orig = XEXP (XEXP (orig, 0), 0);
726 /* Extract CODE_LABEL. */
727 orig = XEXP (orig, 0);
728 add_reg_note (insn, REG_LABEL_OPERAND, orig);
729 LABEL_NUSES (orig)++;
731 crtl->uses_pic_offset_table = 1;
734 if (GET_CODE (orig) == SYMBOL_REF)
740 /* Before reload, allocate a temporary register for the intermediate
741 result. This allows the sequence to be deleted when the final
742 result is unused and the insns are trivially dead. */
743 tmp_reg = ((reload_in_progress || reload_completed)
744 ? reg : gen_reg_rtx (Pmode));
746 if (function_label_operand (orig, mode))
748 /* Force function label into memory in word mode. */
749 orig = XEXP (force_const_mem (word_mode, orig), 0);
750 /* Load plabel address from DLT. */
751 emit_move_insn (tmp_reg,
752 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
753 gen_rtx_HIGH (word_mode, orig)));
755 = gen_const_mem (Pmode,
756 gen_rtx_LO_SUM (Pmode, tmp_reg,
757 gen_rtx_UNSPEC (Pmode,
760 emit_move_insn (reg, pic_ref);
761 /* Now load address of function descriptor. */
762 pic_ref = gen_rtx_MEM (Pmode, reg);
766 /* Load symbol reference from DLT. */
767 emit_move_insn (tmp_reg,
768 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
769 gen_rtx_HIGH (word_mode, orig)));
771 = gen_const_mem (Pmode,
772 gen_rtx_LO_SUM (Pmode, tmp_reg,
773 gen_rtx_UNSPEC (Pmode,
778 crtl->uses_pic_offset_table = 1;
779 mark_reg_pointer (reg, BITS_PER_UNIT);
780 insn = emit_move_insn (reg, pic_ref);
782 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
783 set_unique_reg_note (insn, REG_EQUAL, orig);
787 else if (GET_CODE (orig) == CONST)
791 if (GET_CODE (XEXP (orig, 0)) == PLUS
792 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
796 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
798 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
799 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
800 base == reg ? 0 : reg);
802 if (GET_CODE (orig) == CONST_INT)
804 if (INT_14_BITS (orig))
805 return plus_constant (base, INTVAL (orig));
806 orig = force_reg (Pmode, orig);
808 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
809 /* Likewise, should we set special REG_NOTEs here? */
815 static GTY(()) rtx gen_tls_tga;
818 gen_tls_get_addr (void)
821 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
826 hppa_tls_call (rtx arg)
830 ret = gen_reg_rtx (Pmode);
831 emit_library_call_value (gen_tls_get_addr (), ret,
832 LCT_CONST, Pmode, 1, arg, Pmode);
838 legitimize_tls_address (rtx addr)
840 rtx ret, insn, tmp, t1, t2, tp;
841 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
845 case TLS_MODEL_GLOBAL_DYNAMIC:
846 tmp = gen_reg_rtx (Pmode);
848 emit_insn (gen_tgd_load_pic (tmp, addr));
850 emit_insn (gen_tgd_load (tmp, addr));
851 ret = hppa_tls_call (tmp);
854 case TLS_MODEL_LOCAL_DYNAMIC:
855 ret = gen_reg_rtx (Pmode);
856 tmp = gen_reg_rtx (Pmode);
859 emit_insn (gen_tld_load_pic (tmp, addr));
861 emit_insn (gen_tld_load (tmp, addr));
862 t1 = hppa_tls_call (tmp);
865 t2 = gen_reg_rtx (Pmode);
866 emit_libcall_block (insn, t2, t1,
867 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
869 emit_insn (gen_tld_offset_load (ret, addr, t2));
872 case TLS_MODEL_INITIAL_EXEC:
873 tp = gen_reg_rtx (Pmode);
874 tmp = gen_reg_rtx (Pmode);
875 ret = gen_reg_rtx (Pmode);
876 emit_insn (gen_tp_load (tp));
878 emit_insn (gen_tie_load_pic (tmp, addr));
880 emit_insn (gen_tie_load (tmp, addr));
881 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
884 case TLS_MODEL_LOCAL_EXEC:
885 tp = gen_reg_rtx (Pmode);
886 ret = gen_reg_rtx (Pmode);
887 emit_insn (gen_tp_load (tp));
888 emit_insn (gen_tle_load (ret, addr, tp));
898 /* Try machine-dependent ways of modifying an illegitimate address
899 to be legitimate. If we find one, return the new, valid address.
900 This macro is used in only one place: `memory_address' in explow.c.
902 OLDX is the address as it was before break_out_memory_refs was called.
903 In some cases it is useful to look at this to decide what needs to be done.
905 It is always safe for this macro to do nothing. It exists to recognize
906 opportunities to optimize the output.
908 For the PA, transform:
910 memory(X + <large int>)
914 if (<large int> & mask) >= 16
915 Y = (<large int> & ~mask) + mask + 1 Round up.
917 Y = (<large int> & ~mask) Round down.
919 memory (Z + (<large int> - Y));
921 This is for CSE to find several similar references, and only use one Z.
923 X can either be a SYMBOL_REF or REG, but because combine cannot
924 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
925 D will not fit in 14 bits.
927 MODE_FLOAT references allow displacements which fit in 5 bits, so use
930 MODE_INT references allow displacements which fit in 14 bits, so use
933 This relies on the fact that most mode MODE_FLOAT references will use FP
934 registers and most mode MODE_INT references will use integer registers.
935 (In the rare case of an FP register used in an integer MODE, we depend
936 on secondary reloads to clean things up.)
939 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
940 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
941 addressing modes to be used).
943 Put X and Z into registers. Then put the entire expression into
947 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
948 enum machine_mode mode)
952 /* We need to canonicalize the order of operands in unscaled indexed
953 addresses since the code that checks if an address is valid doesn't
954 always try both orders. */
955 if (!TARGET_NO_SPACE_REGS
956 && GET_CODE (x) == PLUS
957 && GET_MODE (x) == Pmode
958 && REG_P (XEXP (x, 0))
959 && REG_P (XEXP (x, 1))
960 && REG_POINTER (XEXP (x, 0))
961 && !REG_POINTER (XEXP (x, 1)))
962 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
964 if (PA_SYMBOL_REF_TLS_P (x))
965 return legitimize_tls_address (x);
967 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
969 /* Strip off CONST. */
970 if (GET_CODE (x) == CONST)
973 /* Special case. Get the SYMBOL_REF into a register and use indexing.
974 That should always be safe. */
975 if (GET_CODE (x) == PLUS
976 && GET_CODE (XEXP (x, 0)) == REG
977 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
979 rtx reg = force_reg (Pmode, XEXP (x, 1));
980 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
983 /* Note we must reject symbols which represent function addresses
984 since the assembler/linker can't handle arithmetic on plabels. */
985 if (GET_CODE (x) == PLUS
986 && GET_CODE (XEXP (x, 1)) == CONST_INT
987 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
988 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
989 || GET_CODE (XEXP (x, 0)) == REG))
991 rtx int_part, ptr_reg;
993 int offset = INTVAL (XEXP (x, 1));
996 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
997 ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
999 /* Choose which way to round the offset. Round up if we
1000 are >= halfway to the next boundary. */
1001 if ((offset & mask) >= ((mask + 1) / 2))
1002 newoffset = (offset & ~ mask) + mask + 1;
1004 newoffset = (offset & ~ mask);
1006 /* If the newoffset will not fit in 14 bits (ldo), then
1007 handling this would take 4 or 5 instructions (2 to load
1008 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1009 add the new offset and the SYMBOL_REF.) Combine can
1010 not handle 4->2 or 5->2 combinations, so do not create
1012 if (! VAL_14_BITS_P (newoffset)
1013 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1015 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1018 gen_rtx_HIGH (Pmode, const_part));
1021 gen_rtx_LO_SUM (Pmode,
1022 tmp_reg, const_part));
1026 if (! VAL_14_BITS_P (newoffset))
1027 int_part = force_reg (Pmode, GEN_INT (newoffset));
1029 int_part = GEN_INT (newoffset);
1031 ptr_reg = force_reg (Pmode,
1032 gen_rtx_PLUS (Pmode,
1033 force_reg (Pmode, XEXP (x, 0)),
1036 return plus_constant (ptr_reg, offset - newoffset);
1039 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1041 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1042 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1043 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1044 && (OBJECT_P (XEXP (x, 1))
1045 || GET_CODE (XEXP (x, 1)) == SUBREG)
1046 && GET_CODE (XEXP (x, 1)) != CONST)
1048 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1052 if (GET_CODE (reg1) != REG)
1053 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1055 reg2 = XEXP (XEXP (x, 0), 0);
1056 if (GET_CODE (reg2) != REG)
1057 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1059 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1060 gen_rtx_MULT (Pmode,
1066 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1068 Only do so for floating point modes since this is more speculative
1069 and we lose if it's an integer store. */
1070 if (GET_CODE (x) == PLUS
1071 && GET_CODE (XEXP (x, 0)) == PLUS
1072 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1073 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1074 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1075 && (mode == SFmode || mode == DFmode))
1078 /* First, try and figure out what to use as a base register. */
1079 rtx reg1, reg2, base, idx;
1081 reg1 = XEXP (XEXP (x, 0), 1);
1086 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1087 then emit_move_sequence will turn on REG_POINTER so we'll know
1088 it's a base register below. */
1089 if (GET_CODE (reg1) != REG)
1090 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1092 if (GET_CODE (reg2) != REG)
1093 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1095 /* Figure out what the base and index are. */
1097 if (GET_CODE (reg1) == REG
1098 && REG_POINTER (reg1))
1101 idx = gen_rtx_PLUS (Pmode,
1102 gen_rtx_MULT (Pmode,
1103 XEXP (XEXP (XEXP (x, 0), 0), 0),
1104 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1107 else if (GET_CODE (reg2) == REG
1108 && REG_POINTER (reg2))
1117 /* If the index adds a large constant, try to scale the
1118 constant so that it can be loaded with only one insn. */
1119 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1120 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1121 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1122 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1124 /* Divide the CONST_INT by the scale factor, then add it to A. */
1125 int val = INTVAL (XEXP (idx, 1));
1127 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1128 reg1 = XEXP (XEXP (idx, 0), 0);
1129 if (GET_CODE (reg1) != REG)
1130 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1132 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1134 /* We can now generate a simple scaled indexed address. */
1137 (Pmode, gen_rtx_PLUS (Pmode,
1138 gen_rtx_MULT (Pmode, reg1,
1139 XEXP (XEXP (idx, 0), 1)),
1143 /* If B + C is still a valid base register, then add them. */
1144 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1145 && INTVAL (XEXP (idx, 1)) <= 4096
1146 && INTVAL (XEXP (idx, 1)) >= -4096)
1148 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1151 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1153 reg2 = XEXP (XEXP (idx, 0), 0);
1154 if (GET_CODE (reg2) != CONST_INT)
1155 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1157 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1158 gen_rtx_MULT (Pmode,
1164 /* Get the index into a register, then add the base + index and
1165 return a register holding the result. */
1167 /* First get A into a register. */
1168 reg1 = XEXP (XEXP (idx, 0), 0);
1169 if (GET_CODE (reg1) != REG)
1170 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1172 /* And get B into a register. */
1173 reg2 = XEXP (idx, 1);
1174 if (GET_CODE (reg2) != REG)
1175 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1177 reg1 = force_reg (Pmode,
1178 gen_rtx_PLUS (Pmode,
1179 gen_rtx_MULT (Pmode, reg1,
1180 XEXP (XEXP (idx, 0), 1)),
1183 /* Add the result to our base register and return. */
1184 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1188 /* Uh-oh. We might have an address for x[n-100000]. This needs
1189 special handling to avoid creating an indexed memory address
1190 with x-100000 as the base.
1192 If the constant part is small enough, then it's still safe because
1193 there is a guard page at the beginning and end of the data segment.
1195 Scaled references are common enough that we want to try and rearrange the
1196 terms so that we can use indexing for these addresses too. Only
1197 do the optimization for floatint point modes. */
1199 if (GET_CODE (x) == PLUS
1200 && symbolic_expression_p (XEXP (x, 1)))
1202 /* Ugly. We modify things here so that the address offset specified
1203 by the index expression is computed first, then added to x to form
1204 the entire address. */
1206 rtx regx1, regx2, regy1, regy2, y;
1208 /* Strip off any CONST. */
1210 if (GET_CODE (y) == CONST)
1213 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1215 /* See if this looks like
1216 (plus (mult (reg) (shadd_const))
1217 (const (plus (symbol_ref) (const_int))))
1219 Where const_int is small. In that case the const
1220 expression is a valid pointer for indexing.
1222 If const_int is big, but can be divided evenly by shadd_const
1223 and added to (reg). This allows more scaled indexed addresses. */
1224 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1225 && GET_CODE (XEXP (x, 0)) == MULT
1226 && GET_CODE (XEXP (y, 1)) == CONST_INT
1227 && INTVAL (XEXP (y, 1)) >= -4096
1228 && INTVAL (XEXP (y, 1)) <= 4095
1229 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1230 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1232 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1236 if (GET_CODE (reg1) != REG)
1237 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1239 reg2 = XEXP (XEXP (x, 0), 0);
1240 if (GET_CODE (reg2) != REG)
1241 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1243 return force_reg (Pmode,
1244 gen_rtx_PLUS (Pmode,
1245 gen_rtx_MULT (Pmode,
1250 else if ((mode == DFmode || mode == SFmode)
1251 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1252 && GET_CODE (XEXP (x, 0)) == MULT
1253 && GET_CODE (XEXP (y, 1)) == CONST_INT
1254 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1255 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1256 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1259 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1260 / INTVAL (XEXP (XEXP (x, 0), 1))));
1261 regx2 = XEXP (XEXP (x, 0), 0);
1262 if (GET_CODE (regx2) != REG)
1263 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1264 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1268 gen_rtx_PLUS (Pmode,
1269 gen_rtx_MULT (Pmode, regx2,
1270 XEXP (XEXP (x, 0), 1)),
1271 force_reg (Pmode, XEXP (y, 0))));
1273 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1274 && INTVAL (XEXP (y, 1)) >= -4096
1275 && INTVAL (XEXP (y, 1)) <= 4095)
1277 /* This is safe because of the guard page at the
1278 beginning and end of the data space. Just
1279 return the original address. */
1284 /* Doesn't look like one we can optimize. */
1285 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1286 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1287 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1288 regx1 = force_reg (Pmode,
1289 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1291 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1299 /* For the HPPA, REG and REG+CONST is cost 0
1300 and addresses involving symbolic constants are cost 2.
1302 PIC addresses are very expensive.
1304 It is no coincidence that this has the same structure
1305 as GO_IF_LEGITIMATE_ADDRESS. */
1308 hppa_address_cost (rtx X,
1309 bool speed ATTRIBUTE_UNUSED)
1311 switch (GET_CODE (X))
1324 /* Compute a (partial) cost for rtx X. Return true if the complete
1325 cost has been computed, and false if subexpressions should be
1326 scanned. In either case, *TOTAL contains the cost result. */
1329 hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
1330 bool speed ATTRIBUTE_UNUSED)
1335 if (INTVAL (x) == 0)
1337 else if (INT_14_BITS (x))
1354 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1355 && outer_code != SET)
1362 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1363 *total = COSTS_N_INSNS (3);
1364 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1365 *total = COSTS_N_INSNS (8);
1367 *total = COSTS_N_INSNS (20);
1371 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1373 *total = COSTS_N_INSNS (14);
1381 *total = COSTS_N_INSNS (60);
1384 case PLUS: /* this includes shNadd insns */
1386 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1387 *total = COSTS_N_INSNS (3);
1389 *total = COSTS_N_INSNS (1);
1395 *total = COSTS_N_INSNS (1);
1403 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1404 new rtx with the correct mode. */
1406 force_mode (enum machine_mode mode, rtx orig)
1408 if (mode == GET_MODE (orig))
1411 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1413 return gen_rtx_REG (mode, REGNO (orig));
1416 /* Return 1 if *X is a thread-local symbol. */
1419 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1421 return PA_SYMBOL_REF_TLS_P (*x);
1424 /* Return 1 if X contains a thread-local symbol. */
1427 pa_tls_referenced_p (rtx x)
1429 if (!TARGET_HAVE_TLS)
1432 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1435 /* Emit insns to move operands[1] into operands[0].
1437 Return 1 if we have written out everything that needs to be done to
1438 do the move. Otherwise, return 0 and the caller will emit the move
1441 Note SCRATCH_REG may not be in the proper mode depending on how it
1442 will be used. This routine is responsible for creating a new copy
1443 of SCRATCH_REG in the proper mode. */
1446 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1448 register rtx operand0 = operands[0];
1449 register rtx operand1 = operands[1];
1452 /* We can only handle indexed addresses in the destination operand
1453 of floating point stores. Thus, we need to break out indexed
1454 addresses from the destination operand. */
1455 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1457 gcc_assert (can_create_pseudo_p ());
1459 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1460 operand0 = replace_equiv_address (operand0, tem);
1463 /* On targets with non-equivalent space registers, break out unscaled
1464 indexed addresses from the source operand before the final CSE.
1465 We have to do this because the REG_POINTER flag is not correctly
1466 carried through various optimization passes and CSE may substitute
1467 a pseudo without the pointer set for one with the pointer set. As
1468 a result, we loose various opportunities to create insns with
1469 unscaled indexed addresses. */
1470 if (!TARGET_NO_SPACE_REGS
1471 && !cse_not_expected
1472 && GET_CODE (operand1) == MEM
1473 && GET_CODE (XEXP (operand1, 0)) == PLUS
1474 && REG_P (XEXP (XEXP (operand1, 0), 0))
1475 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1477 = replace_equiv_address (operand1,
1478 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1481 && reload_in_progress && GET_CODE (operand0) == REG
1482 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1483 operand0 = reg_equiv_mem[REGNO (operand0)];
1484 else if (scratch_reg
1485 && reload_in_progress && GET_CODE (operand0) == SUBREG
1486 && GET_CODE (SUBREG_REG (operand0)) == REG
1487 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1489 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1490 the code which tracks sets/uses for delete_output_reload. */
1491 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1492 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1493 SUBREG_BYTE (operand0));
1494 operand0 = alter_subreg (&temp);
1498 && reload_in_progress && GET_CODE (operand1) == REG
1499 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1500 operand1 = reg_equiv_mem[REGNO (operand1)];
1501 else if (scratch_reg
1502 && reload_in_progress && GET_CODE (operand1) == SUBREG
1503 && GET_CODE (SUBREG_REG (operand1)) == REG
1504 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1506 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1507 the code which tracks sets/uses for delete_output_reload. */
1508 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1509 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1510 SUBREG_BYTE (operand1));
1511 operand1 = alter_subreg (&temp);
1514 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1515 && ((tem = find_replacement (&XEXP (operand0, 0)))
1516 != XEXP (operand0, 0)))
1517 operand0 = replace_equiv_address (operand0, tem);
1519 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1520 && ((tem = find_replacement (&XEXP (operand1, 0)))
1521 != XEXP (operand1, 0)))
1522 operand1 = replace_equiv_address (operand1, tem);
1524 /* Handle secondary reloads for loads/stores of FP registers from
1525 REG+D addresses where D does not fit in 5 or 14 bits, including
1526 (subreg (mem (addr))) cases. */
1528 && fp_reg_operand (operand0, mode)
1529 && ((GET_CODE (operand1) == MEM
1530 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1531 XEXP (operand1, 0)))
1532 || ((GET_CODE (operand1) == SUBREG
1533 && GET_CODE (XEXP (operand1, 0)) == MEM
1534 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1536 XEXP (XEXP (operand1, 0), 0))))))
1538 if (GET_CODE (operand1) == SUBREG)
1539 operand1 = XEXP (operand1, 0);
1541 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1542 it in WORD_MODE regardless of what mode it was originally given
1544 scratch_reg = force_mode (word_mode, scratch_reg);
1546 /* D might not fit in 14 bits either; for such cases load D into
1548 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1550 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1551 emit_move_insn (scratch_reg,
1552 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1554 XEXP (XEXP (operand1, 0), 0),
1558 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1559 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1560 replace_equiv_address (operand1, scratch_reg)));
1563 else if (scratch_reg
1564 && fp_reg_operand (operand1, mode)
1565 && ((GET_CODE (operand0) == MEM
1566 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1568 XEXP (operand0, 0)))
1569 || ((GET_CODE (operand0) == SUBREG)
1570 && GET_CODE (XEXP (operand0, 0)) == MEM
1571 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1573 XEXP (XEXP (operand0, 0), 0)))))
1575 if (GET_CODE (operand0) == SUBREG)
1576 operand0 = XEXP (operand0, 0);
1578 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1579 it in WORD_MODE regardless of what mode it was originally given
1581 scratch_reg = force_mode (word_mode, scratch_reg);
1583 /* D might not fit in 14 bits either; for such cases load D into
1585 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1587 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1588 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1591 XEXP (XEXP (operand0, 0),
1596 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1597 emit_insn (gen_rtx_SET (VOIDmode,
1598 replace_equiv_address (operand0, scratch_reg),
1602 /* Handle secondary reloads for loads of FP registers from constant
1603 expressions by forcing the constant into memory.
1605 Use scratch_reg to hold the address of the memory location.
1607 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1608 NO_REGS when presented with a const_int and a register class
1609 containing only FP registers. Doing so unfortunately creates
1610 more problems than it solves. Fix this for 2.5. */
1611 else if (scratch_reg
1612 && CONSTANT_P (operand1)
1613 && fp_reg_operand (operand0, mode))
1615 rtx const_mem, xoperands[2];
1617 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1618 it in WORD_MODE regardless of what mode it was originally given
1620 scratch_reg = force_mode (word_mode, scratch_reg);
1622 /* Force the constant into memory and put the address of the
1623 memory location into scratch_reg. */
1624 const_mem = force_const_mem (mode, operand1);
1625 xoperands[0] = scratch_reg;
1626 xoperands[1] = XEXP (const_mem, 0);
1627 emit_move_sequence (xoperands, Pmode, 0);
1629 /* Now load the destination register. */
1630 emit_insn (gen_rtx_SET (mode, operand0,
1631 replace_equiv_address (const_mem, scratch_reg)));
1634 /* Handle secondary reloads for SAR. These occur when trying to load
1635 the SAR from memory, FP register, or with a constant. */
1636 else if (scratch_reg
1637 && GET_CODE (operand0) == REG
1638 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1639 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1640 && (GET_CODE (operand1) == MEM
1641 || GET_CODE (operand1) == CONST_INT
1642 || (GET_CODE (operand1) == REG
1643 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1645 /* D might not fit in 14 bits either; for such cases load D into
1647 if (GET_CODE (operand1) == MEM
1648 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1650 /* We are reloading the address into the scratch register, so we
1651 want to make sure the scratch register is a full register. */
1652 scratch_reg = force_mode (word_mode, scratch_reg);
1654 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1655 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1658 XEXP (XEXP (operand1, 0),
1662 /* Now we are going to load the scratch register from memory,
1663 we want to load it in the same width as the original MEM,
1664 which must be the same as the width of the ultimate destination,
1666 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1668 emit_move_insn (scratch_reg,
1669 replace_equiv_address (operand1, scratch_reg));
1673 /* We want to load the scratch register using the same mode as
1674 the ultimate destination. */
1675 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1677 emit_move_insn (scratch_reg, operand1);
1680 /* And emit the insn to set the ultimate destination. We know that
1681 the scratch register has the same mode as the destination at this
1683 emit_move_insn (operand0, scratch_reg);
1686 /* Handle the most common case: storing into a register. */
1687 else if (register_operand (operand0, mode))
1689 if (register_operand (operand1, mode)
1690 || (GET_CODE (operand1) == CONST_INT
1691 && cint_ok_for_move (INTVAL (operand1)))
1692 || (operand1 == CONST0_RTX (mode))
1693 || (GET_CODE (operand1) == HIGH
1694 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1695 /* Only `general_operands' can come here, so MEM is ok. */
1696 || GET_CODE (operand1) == MEM)
1698 /* Various sets are created during RTL generation which don't
1699 have the REG_POINTER flag correctly set. After the CSE pass,
1700 instruction recognition can fail if we don't consistently
1701 set this flag when performing register copies. This should
1702 also improve the opportunities for creating insns that use
1703 unscaled indexing. */
1704 if (REG_P (operand0) && REG_P (operand1))
1706 if (REG_POINTER (operand1)
1707 && !REG_POINTER (operand0)
1708 && !HARD_REGISTER_P (operand0))
1709 copy_reg_pointer (operand0, operand1);
1712 /* When MEMs are broken out, the REG_POINTER flag doesn't
1713 get set. In some cases, we can set the REG_POINTER flag
1714 from the declaration for the MEM. */
1715 if (REG_P (operand0)
1716 && GET_CODE (operand1) == MEM
1717 && !REG_POINTER (operand0))
1719 tree decl = MEM_EXPR (operand1);
1721 /* Set the register pointer flag and register alignment
1722 if the declaration for this memory reference is a
1728 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1730 if (TREE_CODE (decl) == COMPONENT_REF)
1731 decl = TREE_OPERAND (decl, 1);
1733 type = TREE_TYPE (decl);
1734 type = strip_array_types (type);
1736 if (POINTER_TYPE_P (type))
1740 type = TREE_TYPE (type);
1741 /* Using TYPE_ALIGN_OK is rather conservative as
1742 only the ada frontend actually sets it. */
1743 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1745 mark_reg_pointer (operand0, align);
1750 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1754 else if (GET_CODE (operand0) == MEM)
1756 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1757 && !(reload_in_progress || reload_completed))
1759 rtx temp = gen_reg_rtx (DFmode);
1761 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1762 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1765 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1767 /* Run this case quickly. */
1768 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1771 if (! (reload_in_progress || reload_completed))
1773 operands[0] = validize_mem (operand0);
1774 operands[1] = operand1 = force_reg (mode, operand1);
1778 /* Simplify the source if we need to.
1779 Note we do have to handle function labels here, even though we do
1780 not consider them legitimate constants. Loop optimizations can
1781 call the emit_move_xxx with one as a source. */
1782 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1783 || function_label_operand (operand1, mode)
1784 || (GET_CODE (operand1) == HIGH
1785 && symbolic_operand (XEXP (operand1, 0), mode)))
1789 if (GET_CODE (operand1) == HIGH)
1792 operand1 = XEXP (operand1, 0);
1794 if (symbolic_operand (operand1, mode))
1796 /* Argh. The assembler and linker can't handle arithmetic
1799 So we force the plabel into memory, load operand0 from
1800 the memory location, then add in the constant part. */
1801 if ((GET_CODE (operand1) == CONST
1802 && GET_CODE (XEXP (operand1, 0)) == PLUS
1803 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1804 || function_label_operand (operand1, mode))
1806 rtx temp, const_part;
1808 /* Figure out what (if any) scratch register to use. */
1809 if (reload_in_progress || reload_completed)
1811 scratch_reg = scratch_reg ? scratch_reg : operand0;
1812 /* SCRATCH_REG will hold an address and maybe the actual
1813 data. We want it in WORD_MODE regardless of what mode it
1814 was originally given to us. */
1815 scratch_reg = force_mode (word_mode, scratch_reg);
1818 scratch_reg = gen_reg_rtx (Pmode);
1820 if (GET_CODE (operand1) == CONST)
1822 /* Save away the constant part of the expression. */
1823 const_part = XEXP (XEXP (operand1, 0), 1);
1824 gcc_assert (GET_CODE (const_part) == CONST_INT);
1826 /* Force the function label into memory. */
1827 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1831 /* No constant part. */
1832 const_part = NULL_RTX;
1834 /* Force the function label into memory. */
1835 temp = force_const_mem (mode, operand1);
1839 /* Get the address of the memory location. PIC-ify it if
1841 temp = XEXP (temp, 0);
1843 temp = legitimize_pic_address (temp, mode, scratch_reg);
1845 /* Put the address of the memory location into our destination
1848 emit_move_sequence (operands, mode, scratch_reg);
1850 /* Now load from the memory location into our destination
1852 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1853 emit_move_sequence (operands, mode, scratch_reg);
1855 /* And add back in the constant part. */
1856 if (const_part != NULL_RTX)
1857 expand_inc (operand0, const_part);
1866 if (reload_in_progress || reload_completed)
1868 temp = scratch_reg ? scratch_reg : operand0;
1869 /* TEMP will hold an address and maybe the actual
1870 data. We want it in WORD_MODE regardless of what mode it
1871 was originally given to us. */
1872 temp = force_mode (word_mode, temp);
1875 temp = gen_reg_rtx (Pmode);
1877 /* (const (plus (symbol) (const_int))) must be forced to
1878 memory during/after reload if the const_int will not fit
1880 if (GET_CODE (operand1) == CONST
1881 && GET_CODE (XEXP (operand1, 0)) == PLUS
1882 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1883 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1884 && (reload_completed || reload_in_progress)
1887 rtx const_mem = force_const_mem (mode, operand1);
1888 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1890 operands[1] = replace_equiv_address (const_mem, operands[1]);
1891 emit_move_sequence (operands, mode, temp);
1895 operands[1] = legitimize_pic_address (operand1, mode, temp);
1896 if (REG_P (operand0) && REG_P (operands[1]))
1897 copy_reg_pointer (operand0, operands[1]);
1898 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1901 /* On the HPPA, references to data space are supposed to use dp,
1902 register 27, but showing it in the RTL inhibits various cse
1903 and loop optimizations. */
1908 if (reload_in_progress || reload_completed)
1910 temp = scratch_reg ? scratch_reg : operand0;
1911 /* TEMP will hold an address and maybe the actual
1912 data. We want it in WORD_MODE regardless of what mode it
1913 was originally given to us. */
1914 temp = force_mode (word_mode, temp);
1917 temp = gen_reg_rtx (mode);
1919 /* Loading a SYMBOL_REF into a register makes that register
1920 safe to be used as the base in an indexed address.
1922 Don't mark hard registers though. That loses. */
1923 if (GET_CODE (operand0) == REG
1924 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1925 mark_reg_pointer (operand0, BITS_PER_UNIT);
1926 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1927 mark_reg_pointer (temp, BITS_PER_UNIT);
1930 set = gen_rtx_SET (mode, operand0, temp);
1932 set = gen_rtx_SET (VOIDmode,
1934 gen_rtx_LO_SUM (mode, temp, operand1));
1936 emit_insn (gen_rtx_SET (VOIDmode,
1938 gen_rtx_HIGH (mode, operand1)));
1944 else if (pa_tls_referenced_p (operand1))
1949 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1951 addend = XEXP (XEXP (tmp, 0), 1);
1952 tmp = XEXP (XEXP (tmp, 0), 0);
1955 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1956 tmp = legitimize_tls_address (tmp);
1959 tmp = gen_rtx_PLUS (mode, tmp, addend);
1960 tmp = force_operand (tmp, operands[0]);
1964 else if (GET_CODE (operand1) != CONST_INT
1965 || !cint_ok_for_move (INTVAL (operand1)))
1969 HOST_WIDE_INT value = 0;
1970 HOST_WIDE_INT insv = 0;
1973 if (GET_CODE (operand1) == CONST_INT)
1974 value = INTVAL (operand1);
1977 && GET_CODE (operand1) == CONST_INT
1978 && HOST_BITS_PER_WIDE_INT > 32
1979 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1983 /* Extract the low order 32 bits of the value and sign extend.
1984 If the new value is the same as the original value, we can
1985 can use the original value as-is. If the new value is
1986 different, we use it and insert the most-significant 32-bits
1987 of the original value into the final result. */
1988 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1989 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1992 #if HOST_BITS_PER_WIDE_INT > 32
1993 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
1997 operand1 = GEN_INT (nval);
2001 if (reload_in_progress || reload_completed)
2002 temp = scratch_reg ? scratch_reg : operand0;
2004 temp = gen_reg_rtx (mode);
2006 /* We don't directly split DImode constants on 32-bit targets
2007 because PLUS uses an 11-bit immediate and the insn sequence
2008 generated is not as efficient as the one using HIGH/LO_SUM. */
2009 if (GET_CODE (operand1) == CONST_INT
2010 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2011 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2014 /* Directly break constant into high and low parts. This
2015 provides better optimization opportunities because various
2016 passes recognize constants split with PLUS but not LO_SUM.
2017 We use a 14-bit signed low part except when the addition
2018 of 0x4000 to the high part might change the sign of the
2020 HOST_WIDE_INT low = value & 0x3fff;
2021 HOST_WIDE_INT high = value & ~ 0x3fff;
2025 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2033 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2034 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2038 emit_insn (gen_rtx_SET (VOIDmode, temp,
2039 gen_rtx_HIGH (mode, operand1)));
2040 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2043 insn = emit_move_insn (operands[0], operands[1]);
2045 /* Now insert the most significant 32 bits of the value
2046 into the register. When we don't have a second register
2047 available, it could take up to nine instructions to load
2048 a 64-bit integer constant. Prior to reload, we force
2049 constants that would take more than three instructions
2050 to load to the constant pool. During and after reload,
2051 we have to handle all possible values. */
2054 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2055 register and the value to be inserted is outside the
2056 range that can be loaded with three depdi instructions. */
2057 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2059 operand1 = GEN_INT (insv);
2061 emit_insn (gen_rtx_SET (VOIDmode, temp,
2062 gen_rtx_HIGH (mode, operand1)));
2063 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2064 emit_insn (gen_insv (operand0, GEN_INT (32),
2069 int len = 5, pos = 27;
2071 /* Insert the bits using the depdi instruction. */
2074 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2075 HOST_WIDE_INT sign = v5 < 0;
2077 /* Left extend the insertion. */
2078 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2079 while (pos > 0 && (insv & 1) == sign)
2081 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2086 emit_insn (gen_insv (operand0, GEN_INT (len),
2087 GEN_INT (pos), GEN_INT (v5)));
2089 len = pos > 0 && pos < 5 ? pos : 5;
2095 set_unique_reg_note (insn, REG_EQUAL, op1);
2100 /* Now have insn-emit do whatever it normally does. */
2104 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2105 it will need a link/runtime reloc). */
2108 reloc_needed (tree exp)
2112 switch (TREE_CODE (exp))
2117 case POINTER_PLUS_EXPR:
2120 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2121 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2125 case NON_LVALUE_EXPR:
2126 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2132 unsigned HOST_WIDE_INT ix;
2134 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2136 reloc |= reloc_needed (value);
2149 /* Does operand (which is a symbolic_operand) live in text space?
2150 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2154 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2156 if (GET_CODE (operand) == CONST)
2157 operand = XEXP (XEXP (operand, 0), 0);
2160 if (GET_CODE (operand) == SYMBOL_REF)
2161 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2165 if (GET_CODE (operand) == SYMBOL_REF)
2166 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2172 /* Return the best assembler insn template
2173 for moving operands[1] into operands[0] as a fullword. */
2175 singlemove_string (rtx *operands)
2177 HOST_WIDE_INT intval;
2179 if (GET_CODE (operands[0]) == MEM)
2180 return "stw %r1,%0";
2181 if (GET_CODE (operands[1]) == MEM)
2183 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2188 gcc_assert (GET_MODE (operands[1]) == SFmode);
2190 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2192 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2193 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2195 operands[1] = GEN_INT (i);
2196 /* Fall through to CONST_INT case. */
2198 if (GET_CODE (operands[1]) == CONST_INT)
2200 intval = INTVAL (operands[1]);
2202 if (VAL_14_BITS_P (intval))
2204 else if ((intval & 0x7ff) == 0)
2205 return "ldil L'%1,%0";
2206 else if (zdepi_cint_p (intval))
2207 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2209 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2211 return "copy %1,%0";
2215 /* Compute position (in OP[1]) and width (in OP[2])
2216 useful for copying IMM to a register using the zdepi
2217 instructions. Store the immediate value to insert in OP[0]. */
2219 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2223 /* Find the least significant set bit in IMM. */
2224 for (lsb = 0; lsb < 32; lsb++)
2231 /* Choose variants based on *sign* of the 5-bit field. */
2232 if ((imm & 0x10) == 0)
2233 len = (lsb <= 28) ? 4 : 32 - lsb;
2236 /* Find the width of the bitstring in IMM. */
2237 for (len = 5; len < 32 - lsb; len++)
2239 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2243 /* Sign extend IMM as a 5-bit value. */
2244 imm = (imm & 0xf) - 0x10;
2252 /* Compute position (in OP[1]) and width (in OP[2])
2253 useful for copying IMM to a register using the depdi,z
2254 instructions. Store the immediate value to insert in OP[0]. */
2256 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2258 int lsb, len, maxlen;
2260 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2262 /* Find the least significant set bit in IMM. */
2263 for (lsb = 0; lsb < maxlen; lsb++)
2270 /* Choose variants based on *sign* of the 5-bit field. */
2271 if ((imm & 0x10) == 0)
2272 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2275 /* Find the width of the bitstring in IMM. */
2276 for (len = 5; len < maxlen - lsb; len++)
2278 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2282 /* Extend length if host is narrow and IMM is negative. */
2283 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2286 /* Sign extend IMM as a 5-bit value. */
2287 imm = (imm & 0xf) - 0x10;
2295 /* Output assembler code to perform a doubleword move insn
2296 with operands OPERANDS. */
2299 output_move_double (rtx *operands)
2301 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2303 rtx addreg0 = 0, addreg1 = 0;
2305 /* First classify both operands. */
2307 if (REG_P (operands[0]))
2309 else if (offsettable_memref_p (operands[0]))
2311 else if (GET_CODE (operands[0]) == MEM)
2316 if (REG_P (operands[1]))
2318 else if (CONSTANT_P (operands[1]))
2320 else if (offsettable_memref_p (operands[1]))
2322 else if (GET_CODE (operands[1]) == MEM)
2327 /* Check for the cases that the operand constraints are not
2328 supposed to allow to happen. */
2329 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2331 /* Handle copies between general and floating registers. */
2333 if (optype0 == REGOP && optype1 == REGOP
2334 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2336 if (FP_REG_P (operands[0]))
2338 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2339 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2340 return "{fldds|fldd} -16(%%sp),%0";
2344 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2345 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2346 return "{ldws|ldw} -12(%%sp),%R0";
2350 /* Handle auto decrementing and incrementing loads and stores
2351 specifically, since the structure of the function doesn't work
2352 for them without major modification. Do it better when we learn
2353 this port about the general inc/dec addressing of PA.
2354 (This was written by tege. Chide him if it doesn't work.) */
2356 if (optype0 == MEMOP)
2358 /* We have to output the address syntax ourselves, since print_operand
2359 doesn't deal with the addresses we want to use. Fix this later. */
2361 rtx addr = XEXP (operands[0], 0);
2362 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2364 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2366 operands[0] = XEXP (addr, 0);
2367 gcc_assert (GET_CODE (operands[1]) == REG
2368 && GET_CODE (operands[0]) == REG);
2370 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2372 /* No overlap between high target register and address
2373 register. (We do this in a non-obvious way to
2374 save a register file writeback) */
2375 if (GET_CODE (addr) == POST_INC)
2376 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2377 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2379 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2381 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2383 operands[0] = XEXP (addr, 0);
2384 gcc_assert (GET_CODE (operands[1]) == REG
2385 && GET_CODE (operands[0]) == REG);
2387 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2388 /* No overlap between high target register and address
2389 register. (We do this in a non-obvious way to save a
2390 register file writeback) */
2391 if (GET_CODE (addr) == PRE_INC)
2392 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2393 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2396 if (optype1 == MEMOP)
2398 /* We have to output the address syntax ourselves, since print_operand
2399 doesn't deal with the addresses we want to use. Fix this later. */
2401 rtx addr = XEXP (operands[1], 0);
2402 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2404 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2406 operands[1] = XEXP (addr, 0);
2407 gcc_assert (GET_CODE (operands[0]) == REG
2408 && GET_CODE (operands[1]) == REG);
2410 if (!reg_overlap_mentioned_p (high_reg, addr))
2412 /* No overlap between high target register and address
2413 register. (We do this in a non-obvious way to
2414 save a register file writeback) */
2415 if (GET_CODE (addr) == POST_INC)
2416 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2417 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2421 /* This is an undefined situation. We should load into the
2422 address register *and* update that register. Probably
2423 we don't need to handle this at all. */
2424 if (GET_CODE (addr) == POST_INC)
2425 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2426 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2429 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2431 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2433 operands[1] = XEXP (addr, 0);
2434 gcc_assert (GET_CODE (operands[0]) == REG
2435 && GET_CODE (operands[1]) == REG);
2437 if (!reg_overlap_mentioned_p (high_reg, addr))
2439 /* No overlap between high target register and address
2440 register. (We do this in a non-obvious way to
2441 save a register file writeback) */
2442 if (GET_CODE (addr) == PRE_INC)
2443 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2444 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2448 /* This is an undefined situation. We should load into the
2449 address register *and* update that register. Probably
2450 we don't need to handle this at all. */
2451 if (GET_CODE (addr) == PRE_INC)
2452 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2453 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2456 else if (GET_CODE (addr) == PLUS
2457 && GET_CODE (XEXP (addr, 0)) == MULT)
2460 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2462 if (!reg_overlap_mentioned_p (high_reg, addr))
2464 xoperands[0] = high_reg;
2465 xoperands[1] = XEXP (addr, 1);
2466 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2467 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2468 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2470 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2474 xoperands[0] = high_reg;
2475 xoperands[1] = XEXP (addr, 1);
2476 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2477 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2478 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2480 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2485 /* If an operand is an unoffsettable memory ref, find a register
2486 we can increment temporarily to make it refer to the second word. */
2488 if (optype0 == MEMOP)
2489 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2491 if (optype1 == MEMOP)
2492 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2494 /* Ok, we can do one word at a time.
2495 Normally we do the low-numbered word first.
2497 In either case, set up in LATEHALF the operands to use
2498 for the high-numbered word and in some cases alter the
2499 operands in OPERANDS to be suitable for the low-numbered word. */
2501 if (optype0 == REGOP)
2502 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2503 else if (optype0 == OFFSOP)
2504 latehalf[0] = adjust_address (operands[0], SImode, 4);
2506 latehalf[0] = operands[0];
2508 if (optype1 == REGOP)
2509 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2510 else if (optype1 == OFFSOP)
2511 latehalf[1] = adjust_address (operands[1], SImode, 4);
2512 else if (optype1 == CNSTOP)
2513 split_double (operands[1], &operands[1], &latehalf[1]);
2515 latehalf[1] = operands[1];
2517 /* If the first move would clobber the source of the second one,
2518 do them in the other order.
2520 This can happen in two cases:
2522 mem -> register where the first half of the destination register
2523 is the same register used in the memory's address. Reload
2524 can create such insns.
2526 mem in this case will be either register indirect or register
2527 indirect plus a valid offset.
2529 register -> register move where REGNO(dst) == REGNO(src + 1)
2530 someone (Tim/Tege?) claimed this can happen for parameter loads.
2532 Handle mem -> register case first. */
2533 if (optype0 == REGOP
2534 && (optype1 == MEMOP || optype1 == OFFSOP)
2535 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2538 /* Do the late half first. */
2540 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2541 output_asm_insn (singlemove_string (latehalf), latehalf);
2545 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2546 return singlemove_string (operands);
2549 /* Now handle register -> register case. */
2550 if (optype0 == REGOP && optype1 == REGOP
2551 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2553 output_asm_insn (singlemove_string (latehalf), latehalf);
2554 return singlemove_string (operands);
2557 /* Normal case: do the two words, low-numbered first. */
2559 output_asm_insn (singlemove_string (operands), operands);
2561 /* Make any unoffsettable addresses point at high-numbered word. */
2563 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2565 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2568 output_asm_insn (singlemove_string (latehalf), latehalf);
2570 /* Undo the adds we just did. */
2572 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2574 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2580 output_fp_move_double (rtx *operands)
2582 if (FP_REG_P (operands[0]))
2584 if (FP_REG_P (operands[1])
2585 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2586 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2588 output_asm_insn ("fldd%F1 %1,%0", operands);
2590 else if (FP_REG_P (operands[1]))
2592 output_asm_insn ("fstd%F0 %1,%0", operands);
2598 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2600 /* This is a pain. You have to be prepared to deal with an
2601 arbitrary address here including pre/post increment/decrement.
2603 so avoid this in the MD. */
2604 gcc_assert (GET_CODE (operands[0]) == REG);
2606 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2607 xoperands[0] = operands[0];
2608 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2613 /* Return a REG that occurs in ADDR with coefficient 1.
2614 ADDR can be effectively incremented by incrementing REG. */
2617 find_addr_reg (rtx addr)
2619 while (GET_CODE (addr) == PLUS)
2621 if (GET_CODE (XEXP (addr, 0)) == REG)
2622 addr = XEXP (addr, 0);
2623 else if (GET_CODE (XEXP (addr, 1)) == REG)
2624 addr = XEXP (addr, 1);
2625 else if (CONSTANT_P (XEXP (addr, 0)))
2626 addr = XEXP (addr, 1);
2627 else if (CONSTANT_P (XEXP (addr, 1)))
2628 addr = XEXP (addr, 0);
2632 gcc_assert (GET_CODE (addr) == REG);
2636 /* Emit code to perform a block move.
2638 OPERANDS[0] is the destination pointer as a REG, clobbered.
2639 OPERANDS[1] is the source pointer as a REG, clobbered.
2640 OPERANDS[2] is a register for temporary storage.
2641 OPERANDS[3] is a register for temporary storage.
2642 OPERANDS[4] is the size as a CONST_INT
2643 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2644 OPERANDS[6] is another temporary register. */
2647 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2649 int align = INTVAL (operands[5]);
2650 unsigned long n_bytes = INTVAL (operands[4]);
2652 /* We can't move more than a word at a time because the PA
2653 has no longer integer move insns. (Could use fp mem ops?) */
2654 if (align > (TARGET_64BIT ? 8 : 4))
2655 align = (TARGET_64BIT ? 8 : 4);
2657 /* Note that we know each loop below will execute at least twice
2658 (else we would have open-coded the copy). */
2662 /* Pre-adjust the loop counter. */
2663 operands[4] = GEN_INT (n_bytes - 16);
2664 output_asm_insn ("ldi %4,%2", operands);
2667 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2668 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2669 output_asm_insn ("std,ma %3,8(%0)", operands);
2670 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2671 output_asm_insn ("std,ma %6,8(%0)", operands);
2673 /* Handle the residual. There could be up to 7 bytes of
2674 residual to copy! */
2675 if (n_bytes % 16 != 0)
2677 operands[4] = GEN_INT (n_bytes % 8);
2678 if (n_bytes % 16 >= 8)
2679 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2680 if (n_bytes % 8 != 0)
2681 output_asm_insn ("ldd 0(%1),%6", operands);
2682 if (n_bytes % 16 >= 8)
2683 output_asm_insn ("std,ma %3,8(%0)", operands);
2684 if (n_bytes % 8 != 0)
2685 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2690 /* Pre-adjust the loop counter. */
2691 operands[4] = GEN_INT (n_bytes - 8);
2692 output_asm_insn ("ldi %4,%2", operands);
2695 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2696 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2697 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2698 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2699 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2701 /* Handle the residual. There could be up to 7 bytes of
2702 residual to copy! */
2703 if (n_bytes % 8 != 0)
2705 operands[4] = GEN_INT (n_bytes % 4);
2706 if (n_bytes % 8 >= 4)
2707 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2708 if (n_bytes % 4 != 0)
2709 output_asm_insn ("ldw 0(%1),%6", operands);
2710 if (n_bytes % 8 >= 4)
2711 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2712 if (n_bytes % 4 != 0)
2713 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2718 /* Pre-adjust the loop counter. */
2719 operands[4] = GEN_INT (n_bytes - 4);
2720 output_asm_insn ("ldi %4,%2", operands);
2723 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2724 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2725 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2726 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2727 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2729 /* Handle the residual. */
2730 if (n_bytes % 4 != 0)
2732 if (n_bytes % 4 >= 2)
2733 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2734 if (n_bytes % 2 != 0)
2735 output_asm_insn ("ldb 0(%1),%6", operands);
2736 if (n_bytes % 4 >= 2)
2737 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2738 if (n_bytes % 2 != 0)
2739 output_asm_insn ("stb %6,0(%0)", operands);
2744 /* Pre-adjust the loop counter. */
2745 operands[4] = GEN_INT (n_bytes - 2);
2746 output_asm_insn ("ldi %4,%2", operands);
2749 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2750 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2751 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2752 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2753 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2755 /* Handle the residual. */
2756 if (n_bytes % 2 != 0)
2758 output_asm_insn ("ldb 0(%1),%3", operands);
2759 output_asm_insn ("stb %3,0(%0)", operands);
2768 /* Count the number of insns necessary to handle this block move.
2770 Basic structure is the same as emit_block_move, except that we
2771 count insns rather than emit them. */
2774 compute_movmem_length (rtx insn)
2776 rtx pat = PATTERN (insn);
2777 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2778 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2779 unsigned int n_insns = 0;
2781 /* We can't move more than four bytes at a time because the PA
2782 has no longer integer move insns. (Could use fp mem ops?) */
2783 if (align > (TARGET_64BIT ? 8 : 4))
2784 align = (TARGET_64BIT ? 8 : 4);
2786 /* The basic copying loop. */
2790 if (n_bytes % (2 * align) != 0)
2792 if ((n_bytes % (2 * align)) >= align)
2795 if ((n_bytes % align) != 0)
2799 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2803 /* Emit code to perform a block clear.
2805 OPERANDS[0] is the destination pointer as a REG, clobbered.
2806 OPERANDS[1] is a register for temporary storage.
2807 OPERANDS[2] is the size as a CONST_INT
2808 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2811 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2813 int align = INTVAL (operands[3]);
2814 unsigned long n_bytes = INTVAL (operands[2]);
2816 /* We can't clear more than a word at a time because the PA
2817 has no longer integer move insns. */
2818 if (align > (TARGET_64BIT ? 8 : 4))
2819 align = (TARGET_64BIT ? 8 : 4);
2821 /* Note that we know each loop below will execute at least twice
2822 (else we would have open-coded the copy). */
2826 /* Pre-adjust the loop counter. */
2827 operands[2] = GEN_INT (n_bytes - 16);
2828 output_asm_insn ("ldi %2,%1", operands);
2831 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2832 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2833 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2835 /* Handle the residual. There could be up to 7 bytes of
2836 residual to copy! */
2837 if (n_bytes % 16 != 0)
2839 operands[2] = GEN_INT (n_bytes % 8);
2840 if (n_bytes % 16 >= 8)
2841 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2842 if (n_bytes % 8 != 0)
2843 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2848 /* Pre-adjust the loop counter. */
2849 operands[2] = GEN_INT (n_bytes - 8);
2850 output_asm_insn ("ldi %2,%1", operands);
2853 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2854 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2855 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2857 /* Handle the residual. There could be up to 7 bytes of
2858 residual to copy! */
2859 if (n_bytes % 8 != 0)
2861 operands[2] = GEN_INT (n_bytes % 4);
2862 if (n_bytes % 8 >= 4)
2863 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2864 if (n_bytes % 4 != 0)
2865 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2870 /* Pre-adjust the loop counter. */
2871 operands[2] = GEN_INT (n_bytes - 4);
2872 output_asm_insn ("ldi %2,%1", operands);
2875 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2876 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2877 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2879 /* Handle the residual. */
2880 if (n_bytes % 4 != 0)
2882 if (n_bytes % 4 >= 2)
2883 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2884 if (n_bytes % 2 != 0)
2885 output_asm_insn ("stb %%r0,0(%0)", operands);
2890 /* Pre-adjust the loop counter. */
2891 operands[2] = GEN_INT (n_bytes - 2);
2892 output_asm_insn ("ldi %2,%1", operands);
2895 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2896 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2897 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2899 /* Handle the residual. */
2900 if (n_bytes % 2 != 0)
2901 output_asm_insn ("stb %%r0,0(%0)", operands);
2910 /* Count the number of insns necessary to handle this block move.
2912 Basic structure is the same as emit_block_move, except that we
2913 count insns rather than emit them. */
2916 compute_clrmem_length (rtx insn)
2918 rtx pat = PATTERN (insn);
2919 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2920 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2921 unsigned int n_insns = 0;
2923 /* We can't clear more than a word at a time because the PA
2924 has no longer integer move insns. */
2925 if (align > (TARGET_64BIT ? 8 : 4))
2926 align = (TARGET_64BIT ? 8 : 4);
2928 /* The basic loop. */
2932 if (n_bytes % (2 * align) != 0)
2934 if ((n_bytes % (2 * align)) >= align)
2937 if ((n_bytes % align) != 0)
2941 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2947 output_and (rtx *operands)
2949 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2951 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2952 int ls0, ls1, ms0, p, len;
2954 for (ls0 = 0; ls0 < 32; ls0++)
2955 if ((mask & (1 << ls0)) == 0)
2958 for (ls1 = ls0; ls1 < 32; ls1++)
2959 if ((mask & (1 << ls1)) != 0)
2962 for (ms0 = ls1; ms0 < 32; ms0++)
2963 if ((mask & (1 << ms0)) == 0)
2966 gcc_assert (ms0 == 32);
2974 operands[2] = GEN_INT (len);
2975 return "{extru|extrw,u} %1,31,%2,%0";
2979 /* We could use this `depi' for the case above as well, but `depi'
2980 requires one more register file access than an `extru'. */
2985 operands[2] = GEN_INT (p);
2986 operands[3] = GEN_INT (len);
2987 return "{depi|depwi} 0,%2,%3,%0";
2991 return "and %1,%2,%0";
2994 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2995 storing the result in operands[0]. */
2997 output_64bit_and (rtx *operands)
2999 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3001 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3002 int ls0, ls1, ms0, p, len;
3004 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3005 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3008 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3009 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3012 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3013 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3016 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3018 if (ls1 == HOST_BITS_PER_WIDE_INT)
3024 operands[2] = GEN_INT (len);
3025 return "extrd,u %1,63,%2,%0";
3029 /* We could use this `depi' for the case above as well, but `depi'
3030 requires one more register file access than an `extru'. */
3035 operands[2] = GEN_INT (p);
3036 operands[3] = GEN_INT (len);
3037 return "depdi 0,%2,%3,%0";
3041 return "and %1,%2,%0";
3045 output_ior (rtx *operands)
3047 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3048 int bs0, bs1, p, len;
3050 if (INTVAL (operands[2]) == 0)
3051 return "copy %1,%0";
3053 for (bs0 = 0; bs0 < 32; bs0++)
3054 if ((mask & (1 << bs0)) != 0)
3057 for (bs1 = bs0; bs1 < 32; bs1++)
3058 if ((mask & (1 << bs1)) == 0)
3061 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3066 operands[2] = GEN_INT (p);
3067 operands[3] = GEN_INT (len);
3068 return "{depi|depwi} -1,%2,%3,%0";
3071 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3072 storing the result in operands[0]. */
3074 output_64bit_ior (rtx *operands)
3076 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3077 int bs0, bs1, p, len;
3079 if (INTVAL (operands[2]) == 0)
3080 return "copy %1,%0";
3082 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3083 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3086 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3087 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3090 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3091 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3096 operands[2] = GEN_INT (p);
3097 operands[3] = GEN_INT (len);
3098 return "depdi -1,%2,%3,%0";
3101 /* Target hook for assembling integer objects. This code handles
3102 aligned SI and DI integers specially since function references
3103 must be preceded by P%. */
3106 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3108 if (size == UNITS_PER_WORD
3110 && function_label_operand (x, VOIDmode))
3112 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3113 output_addr_const (asm_out_file, x);
3114 fputc ('\n', asm_out_file);
3117 return default_assemble_integer (x, size, aligned_p);
3120 /* Output an ascii string. */
3122 output_ascii (FILE *file, const char *p, int size)
3126 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3128 /* The HP assembler can only take strings of 256 characters at one
3129 time. This is a limitation on input line length, *not* the
3130 length of the string. Sigh. Even worse, it seems that the
3131 restriction is in number of input characters (see \xnn &
3132 \whatever). So we have to do this very carefully. */
3134 fputs ("\t.STRING \"", file);
3137 for (i = 0; i < size; i += 4)
3141 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3143 register unsigned int c = (unsigned char) p[i + io];
3145 if (c == '\"' || c == '\\')
3146 partial_output[co++] = '\\';
3147 if (c >= ' ' && c < 0177)
3148 partial_output[co++] = c;
3152 partial_output[co++] = '\\';
3153 partial_output[co++] = 'x';
3154 hexd = c / 16 - 0 + '0';
3156 hexd -= '9' - 'a' + 1;
3157 partial_output[co++] = hexd;
3158 hexd = c % 16 - 0 + '0';
3160 hexd -= '9' - 'a' + 1;
3161 partial_output[co++] = hexd;
3164 if (chars_output + co > 243)
3166 fputs ("\"\n\t.STRING \"", file);
3169 fwrite (partial_output, 1, (size_t) co, file);
3173 fputs ("\"\n", file);
3176 /* Try to rewrite floating point comparisons & branches to avoid
3177 useless add,tr insns.
3179 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3180 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3181 first attempt to remove useless add,tr insns. It is zero
3182 for the second pass as reorg sometimes leaves bogus REG_DEAD
3185 When CHECK_NOTES is zero we can only eliminate add,tr insns
3186 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3189 remove_useless_addtr_insns (int check_notes)
3192 static int pass = 0;
3194 /* This is fairly cheap, so always run it when optimizing. */
3198 int fbranch_count = 0;
3200 /* Walk all the insns in this function looking for fcmp & fbranch
3201 instructions. Keep track of how many of each we find. */
3202 for (insn = get_insns (); insn; insn = next_insn (insn))
3206 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3207 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3210 tmp = PATTERN (insn);
3212 /* It must be a set. */
3213 if (GET_CODE (tmp) != SET)
3216 /* If the destination is CCFP, then we've found an fcmp insn. */
3217 tmp = SET_DEST (tmp);
3218 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3224 tmp = PATTERN (insn);
3225 /* If this is an fbranch instruction, bump the fbranch counter. */
3226 if (GET_CODE (tmp) == SET
3227 && SET_DEST (tmp) == pc_rtx
3228 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3229 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3230 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3231 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3239 /* Find all floating point compare + branch insns. If possible,
3240 reverse the comparison & the branch to avoid add,tr insns. */
3241 for (insn = get_insns (); insn; insn = next_insn (insn))
3245 /* Ignore anything that isn't an INSN. */
3246 if (GET_CODE (insn) != INSN)
3249 tmp = PATTERN (insn);
3251 /* It must be a set. */
3252 if (GET_CODE (tmp) != SET)
3255 /* The destination must be CCFP, which is register zero. */
3256 tmp = SET_DEST (tmp);
3257 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3260 /* INSN should be a set of CCFP.
3262 See if the result of this insn is used in a reversed FP
3263 conditional branch. If so, reverse our condition and
3264 the branch. Doing so avoids useless add,tr insns. */
3265 next = next_insn (insn);
3268 /* Jumps, calls and labels stop our search. */
3269 if (GET_CODE (next) == JUMP_INSN
3270 || GET_CODE (next) == CALL_INSN
3271 || GET_CODE (next) == CODE_LABEL)
3274 /* As does another fcmp insn. */
3275 if (GET_CODE (next) == INSN
3276 && GET_CODE (PATTERN (next)) == SET
3277 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3278 && REGNO (SET_DEST (PATTERN (next))) == 0)
3281 next = next_insn (next);
3284 /* Is NEXT_INSN a branch? */
3286 && GET_CODE (next) == JUMP_INSN)
3288 rtx pattern = PATTERN (next);
3290 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3291 and CCFP dies, then reverse our conditional and the branch
3292 to avoid the add,tr. */
3293 if (GET_CODE (pattern) == SET
3294 && SET_DEST (pattern) == pc_rtx
3295 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3296 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3297 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3298 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3299 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3300 && (fcmp_count == fbranch_count
3302 && find_regno_note (next, REG_DEAD, 0))))
3304 /* Reverse the branch. */
3305 tmp = XEXP (SET_SRC (pattern), 1);
3306 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3307 XEXP (SET_SRC (pattern), 2) = tmp;
3308 INSN_CODE (next) = -1;
3310 /* Reverse our condition. */
3311 tmp = PATTERN (insn);
3312 PUT_CODE (XEXP (tmp, 1),
3313 (reverse_condition_maybe_unordered
3314 (GET_CODE (XEXP (tmp, 1)))));
3324 /* You may have trouble believing this, but this is the 32 bit HP-PA
3329 Variable arguments (optional; any number may be allocated)
3331 SP-(4*(N+9)) arg word N
3336 Fixed arguments (must be allocated; may remain unused)
3345 SP-32 External Data Pointer (DP)
3347 SP-24 External/stub RP (RP')
3351 SP-8 Calling Stub RP (RP'')
3356 SP-0 Stack Pointer (points to next available address)
3360 /* This function saves registers as follows. Registers marked with ' are
3361 this function's registers (as opposed to the previous function's).
3362 If a frame_pointer isn't needed, r4 is saved as a general register;
3363 the space for the frame pointer is still allocated, though, to keep
3369 SP (FP') Previous FP
3370 SP + 4 Alignment filler (sigh)
3371 SP + 8 Space for locals reserved here.
3375 SP + n All call saved register used.
3379 SP + o All call saved fp registers used.
3383 SP + p (SP') points to next available address.
3387 /* Global variables set by output_function_prologue(). */
3388 /* Size of frame. Need to know this to emit return insns from
3390 static HOST_WIDE_INT actual_fsize, local_fsize;
3391 static int save_fregs;
3393 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3394 Handle case where DISP > 8k by using the add_high_const patterns.
3396 Note in DISP > 8k case, we will leave the high part of the address
3397 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3400 store_reg (int reg, HOST_WIDE_INT disp, int base)
3402 rtx insn, dest, src, basereg;
3404 src = gen_rtx_REG (word_mode, reg);
3405 basereg = gen_rtx_REG (Pmode, base);
3406 if (VAL_14_BITS_P (disp))
3408 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3409 insn = emit_move_insn (dest, src);
3411 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3413 rtx delta = GEN_INT (disp);
3414 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3416 emit_move_insn (tmpreg, delta);
3417 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3420 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3421 gen_rtx_SET (VOIDmode, tmpreg,
3422 gen_rtx_PLUS (Pmode, basereg, delta)));
3423 RTX_FRAME_RELATED_P (insn) = 1;
3425 dest = gen_rtx_MEM (word_mode, tmpreg);
3426 insn = emit_move_insn (dest, src);
3430 rtx delta = GEN_INT (disp);
3431 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3432 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3434 emit_move_insn (tmpreg, high);
3435 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3436 insn = emit_move_insn (dest, src);
3438 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3439 gen_rtx_SET (VOIDmode,
3440 gen_rtx_MEM (word_mode,
3441 gen_rtx_PLUS (word_mode,
3448 RTX_FRAME_RELATED_P (insn) = 1;
3451 /* Emit RTL to store REG at the memory location specified by BASE and then
3452 add MOD to BASE. MOD must be <= 8k. */
3455 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3457 rtx insn, basereg, srcreg, delta;
3459 gcc_assert (VAL_14_BITS_P (mod));
3461 basereg = gen_rtx_REG (Pmode, base);
3462 srcreg = gen_rtx_REG (word_mode, reg);
3463 delta = GEN_INT (mod);
3465 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3468 RTX_FRAME_RELATED_P (insn) = 1;
3470 /* RTX_FRAME_RELATED_P must be set on each frame related set
3471 in a parallel with more than one element. */
3472 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3473 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3477 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3478 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3479 whether to add a frame note or not.
3481 In the DISP > 8k case, we leave the high part of the address in %r1.
3482 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3485 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3489 if (VAL_14_BITS_P (disp))
3491 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3492 plus_constant (gen_rtx_REG (Pmode, base), disp));
3494 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3496 rtx basereg = gen_rtx_REG (Pmode, base);
3497 rtx delta = GEN_INT (disp);
3498 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3500 emit_move_insn (tmpreg, delta);
3501 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3502 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3504 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3505 gen_rtx_SET (VOIDmode, tmpreg,
3506 gen_rtx_PLUS (Pmode, basereg, delta)));
3510 rtx basereg = gen_rtx_REG (Pmode, base);
3511 rtx delta = GEN_INT (disp);
3512 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3514 emit_move_insn (tmpreg,
3515 gen_rtx_PLUS (Pmode, basereg,
3516 gen_rtx_HIGH (Pmode, delta)));
3517 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3518 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3521 if (DO_FRAME_NOTES && note)
3522 RTX_FRAME_RELATED_P (insn) = 1;
3526 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3531 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3532 be consistent with the rounding and size calculation done here.
3533 Change them at the same time. */
3535 /* We do our own stack alignment. First, round the size of the
3536 stack locals up to a word boundary. */
3537 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3539 /* Space for previous frame pointer + filler. If any frame is
3540 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3541 waste some space here for the sake of HP compatibility. The
3542 first slot is only used when the frame pointer is needed. */
3543 if (size || frame_pointer_needed)
3544 size += STARTING_FRAME_OFFSET;
3546 /* If the current function calls __builtin_eh_return, then we need
3547 to allocate stack space for registers that will hold data for
3548 the exception handler. */
3549 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3553 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3555 size += i * UNITS_PER_WORD;
3558 /* Account for space used by the callee general register saves. */
3559 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3560 if (df_regs_ever_live_p (i))
3561 size += UNITS_PER_WORD;
3563 /* Account for space used by the callee floating point register saves. */
3564 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3565 if (df_regs_ever_live_p (i)
3566 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3570 /* We always save both halves of the FP register, so always
3571 increment the frame size by 8 bytes. */
3575 /* If any of the floating registers are saved, account for the
3576 alignment needed for the floating point register save block. */
3579 size = (size + 7) & ~7;
3584 /* The various ABIs include space for the outgoing parameters in the
3585 size of the current function's stack frame. We don't need to align
3586 for the outgoing arguments as their alignment is set by the final
3587 rounding for the frame as a whole. */
3588 size += crtl->outgoing_args_size;
3590 /* Allocate space for the fixed frame marker. This space must be
3591 allocated for any function that makes calls or allocates
3593 if (!current_function_is_leaf || size)
3594 size += TARGET_64BIT ? 48 : 32;
3596 /* Finally, round to the preferred stack boundary. */
3597 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3598 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3601 /* Generate the assembly code for function entry. FILE is a stdio
3602 stream to output the code to. SIZE is an int: how many units of
3603 temporary storage to allocate.
3605 Refer to the array `regs_ever_live' to determine which registers to
3606 save; `regs_ever_live[I]' is nonzero if register number I is ever
3607 used in the function. This function is responsible for knowing
3608 which registers should not be saved even if used. */
3610 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3611 of memory. If any fpu reg is used in the function, we allocate
3612 such a block here, at the bottom of the frame, just in case it's needed.
3614 If this function is a leaf procedure, then we may choose not
3615 to do a "save" insn. The decision about whether or not
3616 to do this is made in regclass.c. */
3619 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3621 /* The function's label and associated .PROC must never be
3622 separated and must be output *after* any profiling declarations
3623 to avoid changing spaces/subspaces within a procedure. */
3624 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3625 fputs ("\t.PROC\n", file);
3627 /* hppa_expand_prologue does the dirty work now. We just need
3628 to output the assembler directives which denote the start
3630 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3631 if (current_function_is_leaf)
3632 fputs (",NO_CALLS", file);
3634 fputs (",CALLS", file);
3636 fputs (",SAVE_RP", file);
3638 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3639 at the beginning of the frame and that it is used as the frame
3640 pointer for the frame. We do this because our current frame
3641 layout doesn't conform to that specified in the HP runtime
3642 documentation and we need a way to indicate to programs such as
3643 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3644 isn't used by HP compilers but is supported by the assembler.
3645 However, SAVE_SP is supposed to indicate that the previous stack
3646 pointer has been saved in the frame marker. */
3647 if (frame_pointer_needed)
3648 fputs (",SAVE_SP", file);
3650 /* Pass on information about the number of callee register saves
3651 performed in the prologue.
3653 The compiler is supposed to pass the highest register number
3654 saved, the assembler then has to adjust that number before
3655 entering it into the unwind descriptor (to account for any
3656 caller saved registers with lower register numbers than the
3657 first callee saved register). */
3659 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3662 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3664 fputs ("\n\t.ENTRY\n", file);
3666 remove_useless_addtr_insns (0);
3670 hppa_expand_prologue (void)
3672 int merge_sp_adjust_with_store = 0;
3673 HOST_WIDE_INT size = get_frame_size ();
3674 HOST_WIDE_INT offset;
3682 /* Compute total size for frame pointer, filler, locals and rounding to
3683 the next word boundary. Similar code appears in compute_frame_size
3684 and must be changed in tandem with this code. */
3685 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3686 if (local_fsize || frame_pointer_needed)
3687 local_fsize += STARTING_FRAME_OFFSET;
3689 actual_fsize = compute_frame_size (size, &save_fregs);
3691 /* Compute a few things we will use often. */
3692 tmpreg = gen_rtx_REG (word_mode, 1);
3694 /* Save RP first. The calling conventions manual states RP will
3695 always be stored into the caller's frame at sp - 20 or sp - 16
3696 depending on which ABI is in use. */
3697 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3699 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3705 /* Allocate the local frame and set up the frame pointer if needed. */
3706 if (actual_fsize != 0)
3708 if (frame_pointer_needed)
3710 /* Copy the old frame pointer temporarily into %r1. Set up the
3711 new stack pointer, then store away the saved old frame pointer
3712 into the stack at sp and at the same time update the stack
3713 pointer by actual_fsize bytes. Two versions, first
3714 handles small (<8k) frames. The second handles large (>=8k)
3716 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3718 RTX_FRAME_RELATED_P (insn) = 1;
3720 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3722 RTX_FRAME_RELATED_P (insn) = 1;
3724 if (VAL_14_BITS_P (actual_fsize))
3725 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3728 /* It is incorrect to store the saved frame pointer at *sp,
3729 then increment sp (writes beyond the current stack boundary).
3731 So instead use stwm to store at *sp and post-increment the
3732 stack pointer as an atomic operation. Then increment sp to
3733 finish allocating the new frame. */
3734 HOST_WIDE_INT adjust1 = 8192 - 64;
3735 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3737 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3738 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3742 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3743 we need to store the previous stack pointer (frame pointer)
3744 into the frame marker on targets that use the HP unwind
3745 library. This allows the HP unwind library to be used to
3746 unwind GCC frames. However, we are not fully compatible
3747 with the HP library because our frame layout differs from
3748 that specified in the HP runtime specification.
3750 We don't want a frame note on this instruction as the frame
3751 marker moves during dynamic stack allocation.
3753 This instruction also serves as a blockage to prevent
3754 register spills from being scheduled before the stack
3755 pointer is raised. This is necessary as we store
3756 registers using the frame pointer as a base register,
3757 and the frame pointer is set before sp is raised. */
3758 if (TARGET_HPUX_UNWIND_LIBRARY)
3760 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3761 GEN_INT (TARGET_64BIT ? -8 : -4));
3763 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3767 emit_insn (gen_blockage ());
3769 /* no frame pointer needed. */
3772 /* In some cases we can perform the first callee register save
3773 and allocating the stack frame at the same time. If so, just
3774 make a note of it and defer allocating the frame until saving
3775 the callee registers. */
3776 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3777 merge_sp_adjust_with_store = 1;
3778 /* Can not optimize. Adjust the stack frame by actual_fsize
3781 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3786 /* Normal register save.
3788 Do not save the frame pointer in the frame_pointer_needed case. It
3789 was done earlier. */
3790 if (frame_pointer_needed)
3792 offset = local_fsize;
3794 /* Saving the EH return data registers in the frame is the simplest
3795 way to get the frame unwind information emitted. We put them
3796 just before the general registers. */
3797 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3799 unsigned int i, regno;
3803 regno = EH_RETURN_DATA_REGNO (i);
3804 if (regno == INVALID_REGNUM)
3807 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3808 offset += UNITS_PER_WORD;
3812 for (i = 18; i >= 4; i--)
3813 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3815 store_reg (i, offset, FRAME_POINTER_REGNUM);
3816 offset += UNITS_PER_WORD;
3819 /* Account for %r3 which is saved in a special place. */
3822 /* No frame pointer needed. */
3825 offset = local_fsize - actual_fsize;
3827 /* Saving the EH return data registers in the frame is the simplest
3828 way to get the frame unwind information emitted. */
3829 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3831 unsigned int i, regno;
3835 regno = EH_RETURN_DATA_REGNO (i);
3836 if (regno == INVALID_REGNUM)
3839 /* If merge_sp_adjust_with_store is nonzero, then we can
3840 optimize the first save. */
3841 if (merge_sp_adjust_with_store)
3843 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3844 merge_sp_adjust_with_store = 0;
3847 store_reg (regno, offset, STACK_POINTER_REGNUM);
3848 offset += UNITS_PER_WORD;
3852 for (i = 18; i >= 3; i--)
3853 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3855 /* If merge_sp_adjust_with_store is nonzero, then we can
3856 optimize the first GR save. */
3857 if (merge_sp_adjust_with_store)
3859 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3860 merge_sp_adjust_with_store = 0;
3863 store_reg (i, offset, STACK_POINTER_REGNUM);
3864 offset += UNITS_PER_WORD;
3868 /* If we wanted to merge the SP adjustment with a GR save, but we never
3869 did any GR saves, then just emit the adjustment here. */
3870 if (merge_sp_adjust_with_store)
3871 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3875 /* The hppa calling conventions say that %r19, the pic offset
3876 register, is saved at sp - 32 (in this function's frame)
3877 when generating PIC code. FIXME: What is the correct thing
3878 to do for functions which make no calls and allocate no
3879 frame? Do we need to allocate a frame, or can we just omit
3880 the save? For now we'll just omit the save.
3882 We don't want a note on this insn as the frame marker can
3883 move if there is a dynamic stack allocation. */
3884 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3886 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3888 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3892 /* Align pointer properly (doubleword boundary). */
3893 offset = (offset + 7) & ~7;
3895 /* Floating point register store. */
3900 /* First get the frame or stack pointer to the start of the FP register
3902 if (frame_pointer_needed)
3904 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3905 base = frame_pointer_rtx;
3909 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3910 base = stack_pointer_rtx;
3913 /* Now actually save the FP registers. */
3914 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3916 if (df_regs_ever_live_p (i)
3917 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3919 rtx addr, insn, reg;
3920 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3921 reg = gen_rtx_REG (DFmode, i);
3922 insn = emit_move_insn (addr, reg);
3925 RTX_FRAME_RELATED_P (insn) = 1;
3928 rtx mem = gen_rtx_MEM (DFmode,
3929 plus_constant (base, offset));
3930 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3931 gen_rtx_SET (VOIDmode, mem, reg));
3935 rtx meml = gen_rtx_MEM (SFmode,
3936 plus_constant (base, offset));
3937 rtx memr = gen_rtx_MEM (SFmode,
3938 plus_constant (base, offset + 4));
3939 rtx regl = gen_rtx_REG (SFmode, i);
3940 rtx regr = gen_rtx_REG (SFmode, i + 1);
3941 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3942 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3945 RTX_FRAME_RELATED_P (setl) = 1;
3946 RTX_FRAME_RELATED_P (setr) = 1;
3947 vec = gen_rtvec (2, setl, setr);
3948 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3949 gen_rtx_SEQUENCE (VOIDmode, vec));
3952 offset += GET_MODE_SIZE (DFmode);
3959 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3960 Handle case where DISP > 8k by using the add_high_const patterns. */
3963 load_reg (int reg, HOST_WIDE_INT disp, int base)
3965 rtx dest = gen_rtx_REG (word_mode, reg);
3966 rtx basereg = gen_rtx_REG (Pmode, base);
3969 if (VAL_14_BITS_P (disp))
3970 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3971 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3973 rtx delta = GEN_INT (disp);
3974 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3976 emit_move_insn (tmpreg, delta);
3977 if (TARGET_DISABLE_INDEXING)
3979 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3980 src = gen_rtx_MEM (word_mode, tmpreg);
3983 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3987 rtx delta = GEN_INT (disp);
3988 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3989 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3991 emit_move_insn (tmpreg, high);
3992 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3995 emit_move_insn (dest, src);
3998 /* Update the total code bytes output to the text section. */
4001 update_total_code_bytes (unsigned int nbytes)
4003 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4004 && !IN_NAMED_SECTION_P (cfun->decl))
4006 unsigned int old_total = total_code_bytes;
4008 total_code_bytes += nbytes;
4010 /* Be prepared to handle overflows. */
4011 if (old_total > total_code_bytes)
4012 total_code_bytes = UINT_MAX;
4016 /* This function generates the assembly code for function exit.
4017 Args are as for output_function_prologue ().
4019 The function epilogue should not depend on the current stack
4020 pointer! It should use the frame pointer only. This is mandatory
4021 because of alloca; we also take advantage of it to omit stack
4022 adjustments before returning. */
4025 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4027 rtx insn = get_last_insn ();
4031 /* hppa_expand_epilogue does the dirty work now. We just need
4032 to output the assembler directives which denote the end
4035 To make debuggers happy, emit a nop if the epilogue was completely
4036 eliminated due to a volatile call as the last insn in the
4037 current function. That way the return address (in %r2) will
4038 always point to a valid instruction in the current function. */
4040 /* Get the last real insn. */
4041 if (GET_CODE (insn) == NOTE)
4042 insn = prev_real_insn (insn);
4044 /* If it is a sequence, then look inside. */
4045 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4046 insn = XVECEXP (PATTERN (insn), 0, 0);
4048 /* If insn is a CALL_INSN, then it must be a call to a volatile
4049 function (otherwise there would be epilogue insns). */
4050 if (insn && GET_CODE (insn) == CALL_INSN)
4052 fputs ("\tnop\n", file);
4056 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4058 if (TARGET_SOM && TARGET_GAS)
4060 /* We done with this subspace except possibly for some additional
4061 debug information. Forget that we are in this subspace to ensure
4062 that the next function is output in its own subspace. */
4064 cfun->machine->in_nsubspa = 2;
4067 if (INSN_ADDRESSES_SET_P ())
4069 insn = get_last_nonnote_insn ();
4070 last_address += INSN_ADDRESSES (INSN_UID (insn));
4072 last_address += insn_default_length (insn);
4073 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4074 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4077 last_address = UINT_MAX;
4079 /* Finally, update the total number of code bytes output so far. */
4080 update_total_code_bytes (last_address);
4084 hppa_expand_epilogue (void)
4087 HOST_WIDE_INT offset;
4088 HOST_WIDE_INT ret_off = 0;
4090 int merge_sp_adjust_with_load = 0;
4092 /* We will use this often. */
4093 tmpreg = gen_rtx_REG (word_mode, 1);
4095 /* Try to restore RP early to avoid load/use interlocks when
4096 RP gets used in the return (bv) instruction. This appears to still
4097 be necessary even when we schedule the prologue and epilogue. */
4100 ret_off = TARGET_64BIT ? -16 : -20;
4101 if (frame_pointer_needed)
4103 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4108 /* No frame pointer, and stack is smaller than 8k. */
4109 if (VAL_14_BITS_P (ret_off - actual_fsize))
4111 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4117 /* General register restores. */
4118 if (frame_pointer_needed)
4120 offset = local_fsize;
4122 /* If the current function calls __builtin_eh_return, then we need
4123 to restore the saved EH data registers. */
4124 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4126 unsigned int i, regno;
4130 regno = EH_RETURN_DATA_REGNO (i);
4131 if (regno == INVALID_REGNUM)
4134 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4135 offset += UNITS_PER_WORD;
4139 for (i = 18; i >= 4; i--)
4140 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4142 load_reg (i, offset, FRAME_POINTER_REGNUM);
4143 offset += UNITS_PER_WORD;
4148 offset = local_fsize - actual_fsize;
4150 /* If the current function calls __builtin_eh_return, then we need
4151 to restore the saved EH data registers. */
4152 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4154 unsigned int i, regno;
4158 regno = EH_RETURN_DATA_REGNO (i);
4159 if (regno == INVALID_REGNUM)
4162 /* Only for the first load.
4163 merge_sp_adjust_with_load holds the register load
4164 with which we will merge the sp adjustment. */
4165 if (merge_sp_adjust_with_load == 0
4167 && VAL_14_BITS_P (-actual_fsize))
4168 merge_sp_adjust_with_load = regno;
4170 load_reg (regno, offset, STACK_POINTER_REGNUM);
4171 offset += UNITS_PER_WORD;
4175 for (i = 18; i >= 3; i--)
4177 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4179 /* Only for the first load.
4180 merge_sp_adjust_with_load holds the register load
4181 with which we will merge the sp adjustment. */
4182 if (merge_sp_adjust_with_load == 0
4184 && VAL_14_BITS_P (-actual_fsize))
4185 merge_sp_adjust_with_load = i;
4187 load_reg (i, offset, STACK_POINTER_REGNUM);
4188 offset += UNITS_PER_WORD;
4193 /* Align pointer properly (doubleword boundary). */
4194 offset = (offset + 7) & ~7;
4196 /* FP register restores. */
4199 /* Adjust the register to index off of. */
4200 if (frame_pointer_needed)
4201 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4203 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4205 /* Actually do the restores now. */
4206 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4207 if (df_regs_ever_live_p (i)
4208 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4210 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4211 rtx dest = gen_rtx_REG (DFmode, i);
4212 emit_move_insn (dest, src);
4216 /* Emit a blockage insn here to keep these insns from being moved to
4217 an earlier spot in the epilogue, or into the main instruction stream.
4219 This is necessary as we must not cut the stack back before all the
4220 restores are finished. */
4221 emit_insn (gen_blockage ());
4223 /* Reset stack pointer (and possibly frame pointer). The stack
4224 pointer is initially set to fp + 64 to avoid a race condition. */
4225 if (frame_pointer_needed)
4227 rtx delta = GEN_INT (-64);
4229 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4230 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4232 /* If we were deferring a callee register restore, do it now. */
4233 else if (merge_sp_adjust_with_load)
4235 rtx delta = GEN_INT (-actual_fsize);
4236 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4238 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4240 else if (actual_fsize != 0)
4241 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4244 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4245 frame greater than 8k), do so now. */
4247 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4249 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4251 rtx sa = EH_RETURN_STACKADJ_RTX;
4253 emit_insn (gen_blockage ());
4254 emit_insn (TARGET_64BIT
4255 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4256 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4261 hppa_pic_save_rtx (void)
4263 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4266 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4267 #define NO_DEFERRED_PROFILE_COUNTERS 0
4271 /* Vector of funcdef numbers. */
4272 static VEC(int,heap) *funcdef_nos;
4274 /* Output deferred profile counters. */
4276 output_deferred_profile_counters (void)
4281 if (VEC_empty (int, funcdef_nos))
4284 switch_to_section (data_section);
4285 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4286 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4288 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4290 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4291 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4294 VEC_free (int, heap, funcdef_nos);
4298 hppa_profile_hook (int label_no)
4300 /* We use SImode for the address of the function in both 32 and
4301 64-bit code to avoid having to provide DImode versions of the
4302 lcla2 and load_offset_label_address insn patterns. */
4303 rtx reg = gen_reg_rtx (SImode);
4304 rtx label_rtx = gen_label_rtx ();
4305 rtx begin_label_rtx, call_insn;
4306 char begin_label_name[16];
4308 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4310 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4313 emit_move_insn (arg_pointer_rtx,
4314 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4317 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4319 /* The address of the function is loaded into %r25 with an instruction-
4320 relative sequence that avoids the use of relocations. The sequence
4321 is split so that the load_offset_label_address instruction can
4322 occupy the delay slot of the call to _mcount. */
4324 emit_insn (gen_lcla2 (reg, label_rtx));
4326 emit_insn (gen_lcla1 (reg, label_rtx));
4328 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4329 reg, begin_label_rtx, label_rtx));
4331 #if !NO_DEFERRED_PROFILE_COUNTERS
4333 rtx count_label_rtx, addr, r24;
4334 char count_label_name[16];
4336 VEC_safe_push (int, heap, funcdef_nos, label_no);
4337 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4338 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4340 addr = force_reg (Pmode, count_label_rtx);
4341 r24 = gen_rtx_REG (Pmode, 24);
4342 emit_move_insn (r24, addr);
4345 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4346 gen_rtx_SYMBOL_REF (Pmode,
4348 GEN_INT (TARGET_64BIT ? 24 : 12)));
4350 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4355 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4356 gen_rtx_SYMBOL_REF (Pmode,
4358 GEN_INT (TARGET_64BIT ? 16 : 8)));
4362 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4363 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4365 /* Indicate the _mcount call cannot throw, nor will it execute a
4367 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4370 /* Fetch the return address for the frame COUNT steps up from
4371 the current frame, after the prologue. FRAMEADDR is the
4372 frame pointer of the COUNT frame.
4374 We want to ignore any export stub remnants here. To handle this,
4375 we examine the code at the return address, and if it is an export
4376 stub, we return a memory rtx for the stub return address stored
4379 The value returned is used in two different ways:
4381 1. To find a function's caller.
4383 2. To change the return address for a function.
4385 This function handles most instances of case 1; however, it will
4386 fail if there are two levels of stubs to execute on the return
4387 path. The only way I believe that can happen is if the return value
4388 needs a parameter relocation, which never happens for C code.
4390 This function handles most instances of case 2; however, it will
4391 fail if we did not originally have stub code on the return path
4392 but will need stub code on the new return path. This can happen if
4393 the caller & callee are both in the main program, but the new
4394 return location is in a shared library. */
4397 return_addr_rtx (int count, rtx frameaddr)
4404 /* Instruction stream at the normal return address for the export stub:
4406 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4407 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4408 0x00011820 | stub+16: mtsp r1,sr0
4409 0xe0400002 | stub+20: be,n 0(sr0,rp)
4411 0xe0400002 must be specified as -532676606 so that it won't be
4412 rejected as an invalid immediate operand on 64-bit hosts. */
4414 HOST_WIDE_INT insns[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4420 rp = get_hard_reg_initial_val (Pmode, 2);
4422 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4425 /* If there is no export stub then just use the value saved from
4426 the return pointer register. */
4428 saved_rp = gen_reg_rtx (Pmode);
4429 emit_move_insn (saved_rp, rp);
4431 /* Get pointer to the instruction stream. We have to mask out the
4432 privilege level from the two low order bits of the return address
4433 pointer here so that ins will point to the start of the first
4434 instruction that would have been executed if we returned. */
4435 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4436 label = gen_label_rtx ();
4438 /* Check the instruction stream at the normal return address for the
4439 export stub. If it is an export stub, than our return address is
4440 really in -24[frameaddr]. */
4442 for (i = 0; i < 3; i++)
4444 rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4));
4445 rtx op1 = GEN_INT (insns[i]);
4446 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4449 /* Here we know that our return address points to an export
4450 stub. We don't want to return the address of the export stub,
4451 but rather the return address of the export stub. That return
4452 address is stored at -24[frameaddr]. */
4454 emit_move_insn (saved_rp,
4456 memory_address (Pmode,
4457 plus_constant (frameaddr,
4466 emit_bcond_fp (rtx operands[])
4468 enum rtx_code code = GET_CODE (operands[0]);
4469 rtx operand0 = operands[1];
4470 rtx operand1 = operands[2];
4471 rtx label = operands[3];
4473 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4474 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4476 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4477 gen_rtx_IF_THEN_ELSE (VOIDmode,
4480 gen_rtx_REG (CCFPmode, 0),
4482 gen_rtx_LABEL_REF (VOIDmode, label),
4487 /* Adjust the cost of a scheduling dependency. Return the new cost of
4488 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4491 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4493 enum attr_type attr_type;
4495 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4496 true dependencies as they are described with bypasses now. */
4497 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4500 if (! recog_memoized (insn))
4503 attr_type = get_attr_type (insn);
4505 switch (REG_NOTE_KIND (link))
4508 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4511 if (attr_type == TYPE_FPLOAD)
4513 rtx pat = PATTERN (insn);
4514 rtx dep_pat = PATTERN (dep_insn);
4515 if (GET_CODE (pat) == PARALLEL)
4517 /* This happens for the fldXs,mb patterns. */
4518 pat = XVECEXP (pat, 0, 0);
4520 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4521 /* If this happens, we have to extend this to schedule
4522 optimally. Return 0 for now. */
4525 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4527 if (! recog_memoized (dep_insn))
4529 switch (get_attr_type (dep_insn))
4536 case TYPE_FPSQRTSGL:
4537 case TYPE_FPSQRTDBL:
4538 /* A fpload can't be issued until one cycle before a
4539 preceding arithmetic operation has finished if
4540 the target of the fpload is any of the sources
4541 (or destination) of the arithmetic operation. */
4542 return insn_default_latency (dep_insn) - 1;
4549 else if (attr_type == TYPE_FPALU)
4551 rtx pat = PATTERN (insn);
4552 rtx dep_pat = PATTERN (dep_insn);
4553 if (GET_CODE (pat) == PARALLEL)
4555 /* This happens for the fldXs,mb patterns. */
4556 pat = XVECEXP (pat, 0, 0);
4558 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4559 /* If this happens, we have to extend this to schedule
4560 optimally. Return 0 for now. */
4563 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4565 if (! recog_memoized (dep_insn))
4567 switch (get_attr_type (dep_insn))
4571 case TYPE_FPSQRTSGL:
4572 case TYPE_FPSQRTDBL:
4573 /* An ALU flop can't be issued until two cycles before a
4574 preceding divide or sqrt operation has finished if
4575 the target of the ALU flop is any of the sources
4576 (or destination) of the divide or sqrt operation. */
4577 return insn_default_latency (dep_insn) - 2;
4585 /* For other anti dependencies, the cost is 0. */
4588 case REG_DEP_OUTPUT:
4589 /* Output dependency; DEP_INSN writes a register that INSN writes some
4591 if (attr_type == TYPE_FPLOAD)
4593 rtx pat = PATTERN (insn);
4594 rtx dep_pat = PATTERN (dep_insn);
4595 if (GET_CODE (pat) == PARALLEL)
4597 /* This happens for the fldXs,mb patterns. */
4598 pat = XVECEXP (pat, 0, 0);
4600 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4601 /* If this happens, we have to extend this to schedule
4602 optimally. Return 0 for now. */
4605 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4607 if (! recog_memoized (dep_insn))
4609 switch (get_attr_type (dep_insn))
4616 case TYPE_FPSQRTSGL:
4617 case TYPE_FPSQRTDBL:
4618 /* A fpload can't be issued until one cycle before a
4619 preceding arithmetic operation has finished if
4620 the target of the fpload is the destination of the
4621 arithmetic operation.
4623 Exception: For PA7100LC, PA7200 and PA7300, the cost
4624 is 3 cycles, unless they bundle together. We also
4625 pay the penalty if the second insn is a fpload. */
4626 return insn_default_latency (dep_insn) - 1;
4633 else if (attr_type == TYPE_FPALU)
4635 rtx pat = PATTERN (insn);
4636 rtx dep_pat = PATTERN (dep_insn);
4637 if (GET_CODE (pat) == PARALLEL)
4639 /* This happens for the fldXs,mb patterns. */
4640 pat = XVECEXP (pat, 0, 0);
4642 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4643 /* If this happens, we have to extend this to schedule
4644 optimally. Return 0 for now. */
4647 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4649 if (! recog_memoized (dep_insn))
4651 switch (get_attr_type (dep_insn))
4655 case TYPE_FPSQRTSGL:
4656 case TYPE_FPSQRTDBL:
4657 /* An ALU flop can't be issued until two cycles before a
4658 preceding divide or sqrt operation has finished if
4659 the target of the ALU flop is also the target of
4660 the divide or sqrt operation. */
4661 return insn_default_latency (dep_insn) - 2;
4669 /* For other output dependencies, the cost is 0. */
4677 /* Adjust scheduling priorities. We use this to try and keep addil
4678 and the next use of %r1 close together. */
4680 pa_adjust_priority (rtx insn, int priority)
4682 rtx set = single_set (insn);
4686 src = SET_SRC (set);
4687 dest = SET_DEST (set);
4688 if (GET_CODE (src) == LO_SUM
4689 && symbolic_operand (XEXP (src, 1), VOIDmode)
4690 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4693 else if (GET_CODE (src) == MEM
4694 && GET_CODE (XEXP (src, 0)) == LO_SUM
4695 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4696 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4699 else if (GET_CODE (dest) == MEM
4700 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4701 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4702 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4708 /* The 700 can only issue a single insn at a time.
4709 The 7XXX processors can issue two insns at a time.
4710 The 8000 can issue 4 insns at a time. */
4712 pa_issue_rate (void)
4716 case PROCESSOR_700: return 1;
4717 case PROCESSOR_7100: return 2;
4718 case PROCESSOR_7100LC: return 2;
4719 case PROCESSOR_7200: return 2;
4720 case PROCESSOR_7300: return 2;
4721 case PROCESSOR_8000: return 4;
4730 /* Return any length adjustment needed by INSN which already has its length
4731 computed as LENGTH. Return zero if no adjustment is necessary.
4733 For the PA: function calls, millicode calls, and backwards short
4734 conditional branches with unfilled delay slots need an adjustment by +1
4735 (to account for the NOP which will be inserted into the instruction stream).
4737 Also compute the length of an inline block move here as it is too
4738 complicated to express as a length attribute in pa.md. */
4740 pa_adjust_insn_length (rtx insn, int length)
4742 rtx pat = PATTERN (insn);
4744 /* Jumps inside switch tables which have unfilled delay slots need
4746 if (GET_CODE (insn) == JUMP_INSN
4747 && GET_CODE (pat) == PARALLEL
4748 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4750 /* Millicode insn with an unfilled delay slot. */
4751 else if (GET_CODE (insn) == INSN
4752 && GET_CODE (pat) != SEQUENCE
4753 && GET_CODE (pat) != USE
4754 && GET_CODE (pat) != CLOBBER
4755 && get_attr_type (insn) == TYPE_MILLI)
4757 /* Block move pattern. */
4758 else if (GET_CODE (insn) == INSN
4759 && GET_CODE (pat) == PARALLEL
4760 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4761 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4762 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4763 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4764 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4765 return compute_movmem_length (insn) - 4;
4766 /* Block clear pattern. */
4767 else if (GET_CODE (insn) == INSN
4768 && GET_CODE (pat) == PARALLEL
4769 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4770 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4771 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4772 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4773 return compute_clrmem_length (insn) - 4;
4774 /* Conditional branch with an unfilled delay slot. */
4775 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4777 /* Adjust a short backwards conditional with an unfilled delay slot. */
4778 if (GET_CODE (pat) == SET
4780 && JUMP_LABEL (insn) != NULL_RTX
4781 && ! forward_branch_p (insn))
4783 else if (GET_CODE (pat) == PARALLEL
4784 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4787 /* Adjust dbra insn with short backwards conditional branch with
4788 unfilled delay slot -- only for case where counter is in a
4789 general register register. */
4790 else if (GET_CODE (pat) == PARALLEL
4791 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4792 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4793 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4795 && ! forward_branch_p (insn))
4803 /* Print operand X (an rtx) in assembler syntax to file FILE.
4804 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4805 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4808 print_operand (FILE *file, rtx x, int code)
4813 /* Output a 'nop' if there's nothing for the delay slot. */
4814 if (dbr_sequence_length () == 0)
4815 fputs ("\n\tnop", file);
4818 /* Output a nullification completer if there's nothing for the */
4819 /* delay slot or nullification is requested. */
4820 if (dbr_sequence_length () == 0 ||
4822 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4826 /* Print out the second register name of a register pair.
4827 I.e., R (6) => 7. */
4828 fputs (reg_names[REGNO (x) + 1], file);
4831 /* A register or zero. */
4833 || (x == CONST0_RTX (DFmode))
4834 || (x == CONST0_RTX (SFmode)))
4836 fputs ("%r0", file);
4842 /* A register or zero (floating point). */
4844 || (x == CONST0_RTX (DFmode))
4845 || (x == CONST0_RTX (SFmode)))
4847 fputs ("%fr0", file);
4856 xoperands[0] = XEXP (XEXP (x, 0), 0);
4857 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4858 output_global_address (file, xoperands[1], 0);
4859 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4863 case 'C': /* Plain (C)ondition */
4865 switch (GET_CODE (x))
4868 fputs ("=", file); break;
4870 fputs ("<>", file); break;
4872 fputs (">", file); break;
4874 fputs (">=", file); break;
4876 fputs (">>=", file); break;
4878 fputs (">>", file); break;
4880 fputs ("<", file); break;
4882 fputs ("<=", file); break;
4884 fputs ("<<=", file); break;
4886 fputs ("<<", file); break;
4891 case 'N': /* Condition, (N)egated */
4892 switch (GET_CODE (x))
4895 fputs ("<>", file); break;
4897 fputs ("=", file); break;
4899 fputs ("<=", file); break;
4901 fputs ("<", file); break;
4903 fputs ("<<", file); break;
4905 fputs ("<<=", file); break;
4907 fputs (">=", file); break;
4909 fputs (">", file); break;
4911 fputs (">>", file); break;
4913 fputs (">>=", file); break;
4918 /* For floating point comparisons. Note that the output
4919 predicates are the complement of the desired mode. The
4920 conditions for GT, GE, LT, LE and LTGT cause an invalid
4921 operation exception if the result is unordered and this
4922 exception is enabled in the floating-point status register. */
4924 switch (GET_CODE (x))
4927 fputs ("!=", file); break;
4929 fputs ("=", file); break;
4931 fputs ("!>", file); break;
4933 fputs ("!>=", file); break;
4935 fputs ("!<", file); break;
4937 fputs ("!<=", file); break;
4939 fputs ("!<>", file); break;
4941 fputs ("!?<=", file); break;
4943 fputs ("!?<", file); break;
4945 fputs ("!?>=", file); break;
4947 fputs ("!?>", file); break;
4949 fputs ("!?=", file); break;
4951 fputs ("!?", file); break;
4953 fputs ("?", file); break;
4958 case 'S': /* Condition, operands are (S)wapped. */
4959 switch (GET_CODE (x))
4962 fputs ("=", file); break;
4964 fputs ("<>", file); break;
4966 fputs ("<", file); break;
4968 fputs ("<=", file); break;
4970 fputs ("<<=", file); break;
4972 fputs ("<<", file); break;
4974 fputs (">", file); break;
4976 fputs (">=", file); break;
4978 fputs (">>=", file); break;
4980 fputs (">>", file); break;
4985 case 'B': /* Condition, (B)oth swapped and negate. */
4986 switch (GET_CODE (x))
4989 fputs ("<>", file); break;
4991 fputs ("=", file); break;
4993 fputs (">=", file); break;
4995 fputs (">", file); break;
4997 fputs (">>", file); break;
4999 fputs (">>=", file); break;
5001 fputs ("<=", file); break;
5003 fputs ("<", file); break;
5005 fputs ("<<", file); break;
5007 fputs ("<<=", file); break;
5013 gcc_assert (GET_CODE (x) == CONST_INT);
5014 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5017 gcc_assert (GET_CODE (x) == CONST_INT);
5018 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5021 gcc_assert (GET_CODE (x) == CONST_INT);
5022 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5025 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5026 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5029 gcc_assert (GET_CODE (x) == CONST_INT);
5030 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5033 gcc_assert (GET_CODE (x) == CONST_INT);
5034 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5037 if (GET_CODE (x) == CONST_INT)
5042 switch (GET_CODE (XEXP (x, 0)))
5046 if (ASSEMBLER_DIALECT == 0)
5047 fputs ("s,mb", file);
5049 fputs (",mb", file);
5053 if (ASSEMBLER_DIALECT == 0)
5054 fputs ("s,ma", file);
5056 fputs (",ma", file);
5059 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5060 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5062 if (ASSEMBLER_DIALECT == 0)
5065 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5066 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5068 if (ASSEMBLER_DIALECT == 0)
5069 fputs ("x,s", file);
5073 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5077 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5083 output_global_address (file, x, 0);
5086 output_global_address (file, x, 1);
5088 case 0: /* Don't do anything special */
5093 compute_zdepwi_operands (INTVAL (x), op);
5094 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5100 compute_zdepdi_operands (INTVAL (x), op);
5101 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5105 /* We can get here from a .vtable_inherit due to our
5106 CONSTANT_ADDRESS_P rejecting perfectly good constant
5112 if (GET_CODE (x) == REG)
5114 fputs (reg_names [REGNO (x)], file);
5115 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5121 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5122 && (REGNO (x) & 1) == 0)
5125 else if (GET_CODE (x) == MEM)
5127 int size = GET_MODE_SIZE (GET_MODE (x));
5128 rtx base = NULL_RTX;
5129 switch (GET_CODE (XEXP (x, 0)))
5133 base = XEXP (XEXP (x, 0), 0);
5134 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5138 base = XEXP (XEXP (x, 0), 0);
5139 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5142 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5143 fprintf (file, "%s(%s)",
5144 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5145 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5146 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5147 fprintf (file, "%s(%s)",
5148 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5149 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5150 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5151 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5153 /* Because the REG_POINTER flag can get lost during reload,
5154 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5155 index and base registers in the combined move patterns. */
5156 rtx base = XEXP (XEXP (x, 0), 1);
5157 rtx index = XEXP (XEXP (x, 0), 0);
5159 fprintf (file, "%s(%s)",
5160 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5163 output_address (XEXP (x, 0));
5166 output_address (XEXP (x, 0));
5171 output_addr_const (file, x);
5174 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5177 output_global_address (FILE *file, rtx x, int round_constant)
5180 /* Imagine (high (const (plus ...))). */
5181 if (GET_CODE (x) == HIGH)
5184 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5185 output_addr_const (file, x);
5186 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5188 output_addr_const (file, x);
5189 fputs ("-$global$", file);
5191 else if (GET_CODE (x) == CONST)
5193 const char *sep = "";
5194 int offset = 0; /* assembler wants -$global$ at end */
5195 rtx base = NULL_RTX;
5197 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5200 base = XEXP (XEXP (x, 0), 0);
5201 output_addr_const (file, base);
5204 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5210 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5213 base = XEXP (XEXP (x, 0), 1);
5214 output_addr_const (file, base);
5217 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5223 /* How bogus. The compiler is apparently responsible for
5224 rounding the constant if it uses an LR field selector.
5226 The linker and/or assembler seem a better place since
5227 they have to do this kind of thing already.
5229 If we fail to do this, HP's optimizing linker may eliminate
5230 an addil, but not update the ldw/stw/ldo instruction that
5231 uses the result of the addil. */
5233 offset = ((offset + 0x1000) & ~0x1fff);
5235 switch (GET_CODE (XEXP (x, 0)))
5248 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5256 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5257 fputs ("-$global$", file);
5259 fprintf (file, "%s%d", sep, offset);
5262 output_addr_const (file, x);
5265 /* Output boilerplate text to appear at the beginning of the file.
5266 There are several possible versions. */
5267 #define aputs(x) fputs(x, asm_out_file)
5269 pa_file_start_level (void)
5272 aputs ("\t.LEVEL 2.0w\n");
5273 else if (TARGET_PA_20)
5274 aputs ("\t.LEVEL 2.0\n");
5275 else if (TARGET_PA_11)
5276 aputs ("\t.LEVEL 1.1\n");
5278 aputs ("\t.LEVEL 1.0\n");
5282 pa_file_start_space (int sortspace)
5284 aputs ("\t.SPACE $PRIVATE$");
5287 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5288 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5289 "\n\t.SPACE $TEXT$");
5292 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5293 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5297 pa_file_start_file (int want_version)
5299 if (write_symbols != NO_DEBUG)
5301 output_file_directive (asm_out_file, main_input_filename);
5303 aputs ("\t.version\t\"01.01\"\n");
5308 pa_file_start_mcount (const char *aswhat)
5311 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5315 pa_elf_file_start (void)
5317 pa_file_start_level ();
5318 pa_file_start_mcount ("ENTRY");
5319 pa_file_start_file (0);
5323 pa_som_file_start (void)
5325 pa_file_start_level ();
5326 pa_file_start_space (0);
5327 aputs ("\t.IMPORT $global$,DATA\n"
5328 "\t.IMPORT $$dyncall,MILLICODE\n");
5329 pa_file_start_mcount ("CODE");
5330 pa_file_start_file (0);
5334 pa_linux_file_start (void)
5336 pa_file_start_file (1);
5337 pa_file_start_level ();
5338 pa_file_start_mcount ("CODE");
5342 pa_hpux64_gas_file_start (void)
5344 pa_file_start_level ();
5345 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5347 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5349 pa_file_start_file (1);
5353 pa_hpux64_hpas_file_start (void)
5355 pa_file_start_level ();
5356 pa_file_start_space (1);
5357 pa_file_start_mcount ("CODE");
5358 pa_file_start_file (0);
5362 /* Search the deferred plabel list for SYMBOL and return its internal
5363 label. If an entry for SYMBOL is not found, a new entry is created. */
5366 get_deferred_plabel (rtx symbol)
5368 const char *fname = XSTR (symbol, 0);
5371 /* See if we have already put this function on the list of deferred
5372 plabels. This list is generally small, so a liner search is not
5373 too ugly. If it proves too slow replace it with something faster. */
5374 for (i = 0; i < n_deferred_plabels; i++)
5375 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5378 /* If the deferred plabel list is empty, or this entry was not found
5379 on the list, create a new entry on the list. */
5380 if (deferred_plabels == NULL || i == n_deferred_plabels)
5384 if (deferred_plabels == 0)
5385 deferred_plabels = ggc_alloc_deferred_plabel ();
5387 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5389 n_deferred_plabels + 1);
5391 i = n_deferred_plabels++;
5392 deferred_plabels[i].internal_label = gen_label_rtx ();
5393 deferred_plabels[i].symbol = symbol;
5395 /* Gross. We have just implicitly taken the address of this
5396 function. Mark it in the same manner as assemble_name. */
5397 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5399 mark_referenced (id);
5402 return deferred_plabels[i].internal_label;
5406 output_deferred_plabels (void)
5410 /* If we have some deferred plabels, then we need to switch into the
5411 data or readonly data section, and align it to a 4 byte boundary
5412 before outputting the deferred plabels. */
5413 if (n_deferred_plabels)
5415 switch_to_section (flag_pic ? data_section : readonly_data_section);
5416 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5419 /* Now output the deferred plabels. */
5420 for (i = 0; i < n_deferred_plabels; i++)
5422 targetm.asm_out.internal_label (asm_out_file, "L",
5423 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5424 assemble_integer (deferred_plabels[i].symbol,
5425 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5429 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5430 /* Initialize optabs to point to HPUX long double emulation routines. */
5432 pa_hpux_init_libfuncs (void)
5434 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5435 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5436 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5437 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5438 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5439 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5440 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5441 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5442 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5444 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5445 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5446 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5447 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5448 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5449 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5450 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5452 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5453 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5454 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5455 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5457 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5458 ? "__U_Qfcnvfxt_quad_to_sgl"
5459 : "_U_Qfcnvfxt_quad_to_sgl");
5460 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5461 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5462 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5464 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5465 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5466 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5467 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5471 /* HP's millicode routines mean something special to the assembler.
5472 Keep track of which ones we have used. */
5474 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5475 static void import_milli (enum millicodes);
5476 static char imported[(int) end1000];
5477 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5478 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5479 #define MILLI_START 10
5482 import_milli (enum millicodes code)
5484 char str[sizeof (import_string)];
5486 if (!imported[(int) code])
5488 imported[(int) code] = 1;
5489 strcpy (str, import_string);
5490 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5491 output_asm_insn (str, 0);
5495 /* The register constraints have put the operands and return value in
5496 the proper registers. */
5499 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5501 import_milli (mulI);
5502 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5505 /* Emit the rtl for doing a division by a constant. */
5507 /* Do magic division millicodes exist for this value? */
5508 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5510 /* We'll use an array to keep track of the magic millicodes and
5511 whether or not we've used them already. [n][0] is signed, [n][1] is
5514 static int div_milli[16][2];
5517 emit_hpdiv_const (rtx *operands, int unsignedp)
5519 if (GET_CODE (operands[2]) == CONST_INT
5520 && INTVAL (operands[2]) > 0
5521 && INTVAL (operands[2]) < 16
5522 && magic_milli[INTVAL (operands[2])])
5524 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5526 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5530 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5531 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5533 gen_rtx_REG (SImode, 26),
5535 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5536 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5537 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5538 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5539 gen_rtx_CLOBBER (VOIDmode, ret))));
5540 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5547 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5551 /* If the divisor is a constant, try to use one of the special
5553 if (GET_CODE (operands[0]) == CONST_INT)
5555 static char buf[100];
5556 divisor = INTVAL (operands[0]);
5557 if (!div_milli[divisor][unsignedp])
5559 div_milli[divisor][unsignedp] = 1;
5561 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5563 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5567 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5568 INTVAL (operands[0]));
5569 return output_millicode_call (insn,
5570 gen_rtx_SYMBOL_REF (SImode, buf));
5574 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5575 INTVAL (operands[0]));
5576 return output_millicode_call (insn,
5577 gen_rtx_SYMBOL_REF (SImode, buf));
5580 /* Divisor isn't a special constant. */
5585 import_milli (divU);
5586 return output_millicode_call (insn,
5587 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5591 import_milli (divI);
5592 return output_millicode_call (insn,
5593 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5598 /* Output a $$rem millicode to do mod. */
5601 output_mod_insn (int unsignedp, rtx insn)
5605 import_milli (remU);
5606 return output_millicode_call (insn,
5607 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5611 import_milli (remI);
5612 return output_millicode_call (insn,
5613 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5618 output_arg_descriptor (rtx call_insn)
5620 const char *arg_regs[4];
5621 enum machine_mode arg_mode;
5623 int i, output_flag = 0;
5626 /* We neither need nor want argument location descriptors for the
5627 64bit runtime environment or the ELF32 environment. */
5628 if (TARGET_64BIT || TARGET_ELF32)
5631 for (i = 0; i < 4; i++)
5634 /* Specify explicitly that no argument relocations should take place
5635 if using the portable runtime calling conventions. */
5636 if (TARGET_PORTABLE_RUNTIME)
5638 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5643 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5644 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5645 link; link = XEXP (link, 1))
5647 rtx use = XEXP (link, 0);
5649 if (! (GET_CODE (use) == USE
5650 && GET_CODE (XEXP (use, 0)) == REG
5651 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5654 arg_mode = GET_MODE (XEXP (use, 0));
5655 regno = REGNO (XEXP (use, 0));
5656 if (regno >= 23 && regno <= 26)
5658 arg_regs[26 - regno] = "GR";
5659 if (arg_mode == DImode)
5660 arg_regs[25 - regno] = "GR";
5662 else if (regno >= 32 && regno <= 39)
5664 if (arg_mode == SFmode)
5665 arg_regs[(regno - 32) / 2] = "FR";
5668 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5669 arg_regs[(regno - 34) / 2] = "FR";
5670 arg_regs[(regno - 34) / 2 + 1] = "FU";
5672 arg_regs[(regno - 34) / 2] = "FU";
5673 arg_regs[(regno - 34) / 2 + 1] = "FR";
5678 fputs ("\t.CALL ", asm_out_file);
5679 for (i = 0; i < 4; i++)
5684 fputc (',', asm_out_file);
5685 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5688 fputc ('\n', asm_out_file);
5692 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
5693 enum machine_mode mode, secondary_reload_info *sri)
5695 int is_symbolic, regno;
5696 enum reg_class rclass = (enum reg_class) rclass_i;
5698 /* Handle the easy stuff first. */
5699 if (rclass == R1_REGS)
5705 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5711 /* If we have something like (mem (mem (...)), we can safely assume the
5712 inner MEM will end up in a general register after reloading, so there's
5713 no need for a secondary reload. */
5714 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5717 /* Trying to load a constant into a FP register during PIC code
5718 generation requires %r1 as a scratch register. */
5720 && (mode == SImode || mode == DImode)
5721 && FP_REG_CLASS_P (rclass)
5722 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5724 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5725 : CODE_FOR_reload_indi_r1);
5729 /* Profiling showed the PA port spends about 1.3% of its compilation
5730 time in true_regnum from calls inside pa_secondary_reload_class. */
5731 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5732 regno = true_regnum (x);
5734 /* In order to allow 14-bit displacements in integer loads and stores,
5735 we need to prevent reload from generating out of range integer mode
5736 loads and stores to the floating point registers. Previously, we
5737 used to call for a secondary reload and have emit_move_sequence()
5738 fix the instruction sequence. However, reload occasionally wouldn't
5739 generate the reload and we would end up with an invalid REG+D memory
5740 address. So, now we use an intermediate general register for most
5741 memory loads and stores. */
5742 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5743 && GET_MODE_CLASS (mode) == MODE_INT
5744 && FP_REG_CLASS_P (rclass))
5746 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5747 the secondary reload needed for a pseudo. It never passes a
5749 if (GET_CODE (x) == MEM)
5753 /* We don't need an intermediate for indexed and LO_SUM DLT
5754 memory addresses. When INT14_OK_STRICT is true, it might
5755 appear that we could directly allow register indirect
5756 memory addresses. However, this doesn't work because we
5757 don't support SUBREGs in floating-point register copies
5758 and reload doesn't tell us when it's going to use a SUBREG. */
5759 if (IS_INDEX_ADDR_P (x)
5760 || IS_LO_SUM_DLT_ADDR_P (x))
5763 /* Otherwise, we need an intermediate general register. */
5764 return GENERAL_REGS;
5767 /* Request a secondary reload with a general scratch register
5768 for everthing else. ??? Could symbolic operands be handled
5769 directly when generating non-pic PA 2.0 code? */
5770 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5774 /* We need a secondary register (GPR) for copies between the SAR
5775 and anything other than a general register. */
5776 if (rclass == SHIFT_REGS && (regno <= 0 || regno >= 32))
5778 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5782 /* A SAR<->FP register copy requires a secondary register (GPR) as
5783 well as secondary memory. */
5784 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5785 && (REGNO_REG_CLASS (regno) == SHIFT_REGS
5786 && FP_REG_CLASS_P (rclass)))
5788 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5792 /* Secondary reloads of symbolic operands require %r1 as a scratch
5793 register when we're generating PIC code and when the operand isn't
5795 if (GET_CODE (x) == HIGH)
5798 /* Profiling has showed GCC spends about 2.6% of its compilation
5799 time in symbolic_operand from calls inside pa_secondary_reload_class.
5800 So, we use an inline copy to avoid useless work. */
5801 switch (GET_CODE (x))
5806 is_symbolic = !SYMBOL_REF_TLS_MODEL (x);
5813 is_symbolic = (GET_CODE (op) == PLUS
5814 && ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
5815 && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
5816 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
5817 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5824 if (is_symbolic && (flag_pic || !read_only_operand (x, VOIDmode)))
5826 gcc_assert (mode == SImode || mode == DImode);
5827 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5828 : CODE_FOR_reload_indi_r1);
5834 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
5835 is only marked as live on entry by df-scan when it is a fixed
5836 register. It isn't a fixed register in the 64-bit runtime,
5837 so we need to mark it here. */
5840 pa_extra_live_on_entry (bitmap regs)
5843 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
5846 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
5847 to prevent it from being deleted. */
5850 pa_eh_return_handler_rtx (void)
5854 tmp = gen_rtx_PLUS (word_mode, frame_pointer_rtx,
5855 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
5856 tmp = gen_rtx_MEM (word_mode, tmp);
5861 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5862 by invisible reference. As a GCC extension, we also pass anything
5863 with a zero or variable size by reference.
5865 The 64-bit runtime does not describe passing any types by invisible
5866 reference. The internals of GCC can't currently handle passing
5867 empty structures, and zero or variable length arrays when they are
5868 not passed entirely on the stack or by reference. Thus, as a GCC
5869 extension, we pass these types by reference. The HP compiler doesn't
5870 support these types, so hopefully there shouldn't be any compatibility
5871 issues. This may have to be revisited when HP releases a C99 compiler
5872 or updates the ABI. */
5875 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5876 enum machine_mode mode, const_tree type,
5877 bool named ATTRIBUTE_UNUSED)
5882 size = int_size_in_bytes (type);
5884 size = GET_MODE_SIZE (mode);
5889 return size <= 0 || size > 8;
5893 function_arg_padding (enum machine_mode mode, const_tree type)
5898 && (AGGREGATE_TYPE_P (type)
5899 || TREE_CODE (type) == COMPLEX_TYPE
5900 || TREE_CODE (type) == VECTOR_TYPE)))
5902 /* Return none if justification is not required. */
5904 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5905 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5908 /* The directions set here are ignored when a BLKmode argument larger
5909 than a word is placed in a register. Different code is used for
5910 the stack and registers. This makes it difficult to have a
5911 consistent data representation for both the stack and registers.
5912 For both runtimes, the justification and padding for arguments on
5913 the stack and in registers should be identical. */
5915 /* The 64-bit runtime specifies left justification for aggregates. */
5918 /* The 32-bit runtime architecture specifies right justification.
5919 When the argument is passed on the stack, the argument is padded
5920 with garbage on the left. The HP compiler pads with zeros. */
5924 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5931 /* Do what is necessary for `va_start'. We look at the current function
5932 to determine if stdargs or varargs is used and fill in an initial
5933 va_list. A pointer to this constructor is returned. */
5936 hppa_builtin_saveregs (void)
5939 tree fntype = TREE_TYPE (current_function_decl);
5940 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5941 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5942 != void_type_node)))
5943 ? UNITS_PER_WORD : 0);
5946 offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
5948 offset = crtl->args.arg_offset_rtx;
5954 /* Adjust for varargs/stdarg differences. */
5956 offset = plus_constant (crtl->args.arg_offset_rtx, -argadj);
5958 offset = crtl->args.arg_offset_rtx;
5960 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5961 from the incoming arg pointer and growing to larger addresses. */
5962 for (i = 26, off = -64; i >= 19; i--, off += 8)
5963 emit_move_insn (gen_rtx_MEM (word_mode,
5964 plus_constant (arg_pointer_rtx, off)),
5965 gen_rtx_REG (word_mode, i));
5967 /* The incoming args pointer points just beyond the flushback area;
5968 normally this is not a serious concern. However, when we are doing
5969 varargs/stdargs we want to make the arg pointer point to the start
5970 of the incoming argument area. */
5971 emit_move_insn (virtual_incoming_args_rtx,
5972 plus_constant (arg_pointer_rtx, -64));
5974 /* Now return a pointer to the first anonymous argument. */
5975 return copy_to_reg (expand_binop (Pmode, add_optab,
5976 virtual_incoming_args_rtx,
5977 offset, 0, 0, OPTAB_LIB_WIDEN));
5980 /* Store general registers on the stack. */
5981 dest = gen_rtx_MEM (BLKmode,
5982 plus_constant (crtl->args.internal_arg_pointer,
5984 set_mem_alias_set (dest, get_varargs_alias_set ());
5985 set_mem_align (dest, BITS_PER_WORD);
5986 move_block_from_reg (23, dest, 4);
5988 /* move_block_from_reg will emit code to store the argument registers
5989 individually as scalar stores.
5991 However, other insns may later load from the same addresses for
5992 a structure load (passing a struct to a varargs routine).
5994 The alias code assumes that such aliasing can never happen, so we
5995 have to keep memory referencing insns from moving up beyond the
5996 last argument register store. So we emit a blockage insn here. */
5997 emit_insn (gen_blockage ());
5999 return copy_to_reg (expand_binop (Pmode, add_optab,
6000 crtl->args.internal_arg_pointer,
6001 offset, 0, 0, OPTAB_LIB_WIDEN));
6005 hppa_va_start (tree valist, rtx nextarg)
6007 nextarg = expand_builtin_saveregs ();
6008 std_expand_builtin_va_start (valist, nextarg);
6012 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6017 /* Args grow upward. We can use the generic routines. */
6018 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6020 else /* !TARGET_64BIT */
6022 tree ptr = build_pointer_type (type);
6025 unsigned int size, ofs;
6028 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6032 ptr = build_pointer_type (type);
6034 size = int_size_in_bytes (type);
6035 valist_type = TREE_TYPE (valist);
6037 /* Args grow down. Not handled by generic routines. */
6039 u = fold_convert (sizetype, size_in_bytes (type));
6040 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6041 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
6043 /* Align to 4 or 8 byte boundary depending on argument size. */
6045 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6046 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6047 t = fold_convert (valist_type, t);
6049 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6051 ofs = (8 - size) % 4;
6055 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
6058 t = fold_convert (ptr, t);
6059 t = build_va_arg_indirect_ref (t);
6062 t = build_va_arg_indirect_ref (t);
6068 /* True if MODE is valid for the target. By "valid", we mean able to
6069 be manipulated in non-trivial ways. In particular, this means all
6070 the arithmetic is supported.
6072 Currently, TImode is not valid as the HP 64-bit runtime documentation
6073 doesn't document the alignment and calling conventions for this type.
6074 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6075 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6078 pa_scalar_mode_supported_p (enum machine_mode mode)
6080 int precision = GET_MODE_PRECISION (mode);
6082 switch (GET_MODE_CLASS (mode))
6084 case MODE_PARTIAL_INT:
6086 if (precision == CHAR_TYPE_SIZE)
6088 if (precision == SHORT_TYPE_SIZE)
6090 if (precision == INT_TYPE_SIZE)
6092 if (precision == LONG_TYPE_SIZE)
6094 if (precision == LONG_LONG_TYPE_SIZE)
6099 if (precision == FLOAT_TYPE_SIZE)
6101 if (precision == DOUBLE_TYPE_SIZE)
6103 if (precision == LONG_DOUBLE_TYPE_SIZE)
6107 case MODE_DECIMAL_FLOAT:
6115 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6116 it branches to the next real instruction. Otherwise, return FALSE. */
6119 branch_to_delay_slot_p (rtx insn)
6121 if (dbr_sequence_length ())
6124 return next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn);
6127 /* Return TRUE if INSN, a jump insn, needs a nop in its delay slot.
6129 This occurs when INSN has an unfilled delay slot and is followed
6130 by an ASM_INPUT. Disaster can occur if the ASM_INPUT is empty and
6131 the jump branches into the delay slot. So, we add a nop in the delay
6132 slot just to be safe. This messes up our instruction count, but we
6133 don't know how big the ASM_INPUT insn is anyway. */
6136 branch_needs_nop_p (rtx insn)
6140 if (dbr_sequence_length ())
6143 next_insn = next_real_insn (insn);
6144 return GET_CODE (PATTERN (next_insn)) == ASM_INPUT;
6147 /* This routine handles all the normal conditional branch sequences we
6148 might need to generate. It handles compare immediate vs compare
6149 register, nullification of delay slots, varying length branches,
6150 negated branches, and all combinations of the above. It returns the
6151 output appropriate to emit the branch corresponding to all given
6155 output_cbranch (rtx *operands, int negated, rtx insn)
6157 static char buf[100];
6159 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6160 int length = get_attr_length (insn);
6163 /* A conditional branch to the following instruction (e.g. the delay slot)
6164 is asking for a disaster. This can happen when not optimizing and
6165 when jump optimization fails.
6167 While it is usually safe to emit nothing, this can fail if the
6168 preceding instruction is a nullified branch with an empty delay
6169 slot and the same branch target as this branch. We could check
6170 for this but jump optimization should eliminate nop jumps. It
6171 is always safe to emit a nop. */
6172 if (branch_to_delay_slot_p (insn))
6175 /* The doubleword form of the cmpib instruction doesn't have the LEU
6176 and GTU conditions while the cmpb instruction does. Since we accept
6177 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6178 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6179 operands[2] = gen_rtx_REG (DImode, 0);
6180 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6181 operands[1] = gen_rtx_REG (DImode, 0);
6183 /* If this is a long branch with its delay slot unfilled, set `nullify'
6184 as it can nullify the delay slot and save a nop. */
6185 if (length == 8 && dbr_sequence_length () == 0)
6188 /* If this is a short forward conditional branch which did not get
6189 its delay slot filled, the delay slot can still be nullified. */
6190 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6191 nullify = forward_branch_p (insn);
6193 /* A forward branch over a single nullified insn can be done with a
6194 comclr instruction. This avoids a single cycle penalty due to
6195 mis-predicted branch if we fall through (branch not taken). */
6197 && next_real_insn (insn) != 0
6198 && get_attr_length (next_real_insn (insn)) == 4
6199 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6205 /* All short conditional branches except backwards with an unfilled
6209 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6211 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6212 if (GET_MODE (operands[1]) == DImode)
6215 strcat (buf, "%B3");
6217 strcat (buf, "%S3");
6219 strcat (buf, " %2,%r1,%%r0");
6222 if (branch_needs_nop_p (insn))
6223 strcat (buf, ",n %2,%r1,%0%#");
6225 strcat (buf, ",n %2,%r1,%0");
6228 strcat (buf, " %2,%r1,%0");
6231 /* All long conditionals. Note a short backward branch with an
6232 unfilled delay slot is treated just like a long backward branch
6233 with an unfilled delay slot. */
6235 /* Handle weird backwards branch with a filled delay slot
6236 which is nullified. */
6237 if (dbr_sequence_length () != 0
6238 && ! forward_branch_p (insn)
6241 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6242 if (GET_MODE (operands[1]) == DImode)
6245 strcat (buf, "%S3");
6247 strcat (buf, "%B3");
6248 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6250 /* Handle short backwards branch with an unfilled delay slot.
6251 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6252 taken and untaken branches. */
6253 else if (dbr_sequence_length () == 0
6254 && ! forward_branch_p (insn)
6255 && INSN_ADDRESSES_SET_P ()
6256 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6257 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6259 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6260 if (GET_MODE (operands[1]) == DImode)
6263 strcat (buf, "%B3 %2,%r1,%0%#");
6265 strcat (buf, "%S3 %2,%r1,%0%#");
6269 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6270 if (GET_MODE (operands[1]) == DImode)
6273 strcat (buf, "%S3");
6275 strcat (buf, "%B3");
6277 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6279 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6284 /* The reversed conditional branch must branch over one additional
6285 instruction if the delay slot is filled and needs to be extracted
6286 by output_lbranch. If the delay slot is empty or this is a
6287 nullified forward branch, the instruction after the reversed
6288 condition branch must be nullified. */
6289 if (dbr_sequence_length () == 0
6290 || (nullify && forward_branch_p (insn)))
6294 operands[4] = GEN_INT (length);
6299 operands[4] = GEN_INT (length + 4);
6302 /* Create a reversed conditional branch which branches around
6303 the following insns. */
6304 if (GET_MODE (operands[1]) != DImode)
6310 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6313 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6319 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6322 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6331 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6334 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6340 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6343 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6347 output_asm_insn (buf, operands);
6348 return output_lbranch (operands[0], insn, xdelay);
6353 /* This routine handles output of long unconditional branches that
6354 exceed the maximum range of a simple branch instruction. Since
6355 we don't have a register available for the branch, we save register
6356 %r1 in the frame marker, load the branch destination DEST into %r1,
6357 execute the branch, and restore %r1 in the delay slot of the branch.
6359 Since long branches may have an insn in the delay slot and the
6360 delay slot is used to restore %r1, we in general need to extract
6361 this insn and execute it before the branch. However, to facilitate
6362 use of this function by conditional branches, we also provide an
6363 option to not extract the delay insn so that it will be emitted
6364 after the long branch. So, if there is an insn in the delay slot,
6365 it is extracted if XDELAY is nonzero.
6367 The lengths of the various long-branch sequences are 20, 16 and 24
6368 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6371 output_lbranch (rtx dest, rtx insn, int xdelay)
6375 xoperands[0] = dest;
6377 /* First, free up the delay slot. */
6378 if (xdelay && dbr_sequence_length () != 0)
6380 /* We can't handle a jump in the delay slot. */
6381 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6383 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6386 /* Now delete the delay insn. */
6387 SET_INSN_DELETED (NEXT_INSN (insn));
6390 /* Output an insn to save %r1. The runtime documentation doesn't
6391 specify whether the "Clean Up" slot in the callers frame can
6392 be clobbered by the callee. It isn't copied by HP's builtin
6393 alloca, so this suggests that it can be clobbered if necessary.
6394 The "Static Link" location is copied by HP builtin alloca, so
6395 we avoid using it. Using the cleanup slot might be a problem
6396 if we have to interoperate with languages that pass cleanup
6397 information. However, it should be possible to handle these
6398 situations with GCC's asm feature.
6400 The "Current RP" slot is reserved for the called procedure, so
6401 we try to use it when we don't have a frame of our own. It's
6402 rather unlikely that we won't have a frame when we need to emit
6405 Really the way to go long term is a register scavenger; goto
6406 the target of the jump and find a register which we can use
6407 as a scratch to hold the value in %r1. Then, we wouldn't have
6408 to free up the delay slot or clobber a slot that may be needed
6409 for other purposes. */
6412 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6413 /* Use the return pointer slot in the frame marker. */
6414 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6416 /* Use the slot at -40 in the frame marker since HP builtin
6417 alloca doesn't copy it. */
6418 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6422 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6423 /* Use the return pointer slot in the frame marker. */
6424 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6426 /* Use the "Clean Up" slot in the frame marker. In GCC,
6427 the only other use of this location is for copying a
6428 floating point double argument from a floating-point
6429 register to two general registers. The copy is done
6430 as an "atomic" operation when outputting a call, so it
6431 won't interfere with our using the location here. */
6432 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6435 if (TARGET_PORTABLE_RUNTIME)
6437 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6438 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6439 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6443 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6444 if (TARGET_SOM || !TARGET_GAS)
6446 xoperands[1] = gen_label_rtx ();
6447 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6448 targetm.asm_out.internal_label (asm_out_file, "L",
6449 CODE_LABEL_NUMBER (xoperands[1]));
6450 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6454 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6455 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6457 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6460 /* Now output a very long branch to the original target. */
6461 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6463 /* Now restore the value of %r1 in the delay slot. */
6466 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6467 return "ldd -16(%%r30),%%r1";
6469 return "ldd -40(%%r30),%%r1";
6473 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6474 return "ldw -20(%%r30),%%r1";
6476 return "ldw -12(%%r30),%%r1";
6480 /* This routine handles all the branch-on-bit conditional branch sequences we
6481 might need to generate. It handles nullification of delay slots,
6482 varying length branches, negated branches and all combinations of the
6483 above. it returns the appropriate output template to emit the branch. */
6486 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6488 static char buf[100];
6490 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6491 int length = get_attr_length (insn);
6494 /* A conditional branch to the following instruction (e.g. the delay slot) is
6495 asking for a disaster. I do not think this can happen as this pattern
6496 is only used when optimizing; jump optimization should eliminate the
6497 jump. But be prepared just in case. */
6499 if (branch_to_delay_slot_p (insn))
6502 /* If this is a long branch with its delay slot unfilled, set `nullify'
6503 as it can nullify the delay slot and save a nop. */
6504 if (length == 8 && dbr_sequence_length () == 0)
6507 /* If this is a short forward conditional branch which did not get
6508 its delay slot filled, the delay slot can still be nullified. */
6509 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6510 nullify = forward_branch_p (insn);
6512 /* A forward branch over a single nullified insn can be done with a
6513 extrs instruction. This avoids a single cycle penalty due to
6514 mis-predicted branch if we fall through (branch not taken). */
6517 && next_real_insn (insn) != 0
6518 && get_attr_length (next_real_insn (insn)) == 4
6519 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6526 /* All short conditional branches except backwards with an unfilled
6530 strcpy (buf, "{extrs,|extrw,s,}");
6532 strcpy (buf, "bb,");
6533 if (useskip && GET_MODE (operands[0]) == DImode)
6534 strcpy (buf, "extrd,s,*");
6535 else if (GET_MODE (operands[0]) == DImode)
6536 strcpy (buf, "bb,*");
6537 if ((which == 0 && negated)
6538 || (which == 1 && ! negated))
6543 strcat (buf, " %0,%1,1,%%r0");
6544 else if (nullify && negated)
6546 if (branch_needs_nop_p (insn))
6547 strcat (buf, ",n %0,%1,%3%#");
6549 strcat (buf, ",n %0,%1,%3");
6551 else if (nullify && ! negated)
6553 if (branch_needs_nop_p (insn))
6554 strcat (buf, ",n %0,%1,%2%#");
6556 strcat (buf, ",n %0,%1,%2");
6558 else if (! nullify && negated)
6559 strcat (buf, " %0,%1,%3");
6560 else if (! nullify && ! negated)
6561 strcat (buf, " %0,%1,%2");
6564 /* All long conditionals. Note a short backward branch with an
6565 unfilled delay slot is treated just like a long backward branch
6566 with an unfilled delay slot. */
6568 /* Handle weird backwards branch with a filled delay slot
6569 which is nullified. */
6570 if (dbr_sequence_length () != 0
6571 && ! forward_branch_p (insn)
6574 strcpy (buf, "bb,");
6575 if (GET_MODE (operands[0]) == DImode)
6577 if ((which == 0 && negated)
6578 || (which == 1 && ! negated))
6583 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6585 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6587 /* Handle short backwards branch with an unfilled delay slot.
6588 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6589 taken and untaken branches. */
6590 else if (dbr_sequence_length () == 0
6591 && ! forward_branch_p (insn)
6592 && INSN_ADDRESSES_SET_P ()
6593 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6594 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6596 strcpy (buf, "bb,");
6597 if (GET_MODE (operands[0]) == DImode)
6599 if ((which == 0 && negated)
6600 || (which == 1 && ! negated))
6605 strcat (buf, " %0,%1,%3%#");
6607 strcat (buf, " %0,%1,%2%#");
6611 if (GET_MODE (operands[0]) == DImode)
6612 strcpy (buf, "extrd,s,*");
6614 strcpy (buf, "{extrs,|extrw,s,}");
6615 if ((which == 0 && negated)
6616 || (which == 1 && ! negated))
6620 if (nullify && negated)
6621 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6622 else if (nullify && ! negated)
6623 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6625 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6627 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6632 /* The reversed conditional branch must branch over one additional
6633 instruction if the delay slot is filled and needs to be extracted
6634 by output_lbranch. If the delay slot is empty or this is a
6635 nullified forward branch, the instruction after the reversed
6636 condition branch must be nullified. */
6637 if (dbr_sequence_length () == 0
6638 || (nullify && forward_branch_p (insn)))
6642 operands[4] = GEN_INT (length);
6647 operands[4] = GEN_INT (length + 4);
6650 if (GET_MODE (operands[0]) == DImode)
6651 strcpy (buf, "bb,*");
6653 strcpy (buf, "bb,");
6654 if ((which == 0 && negated)
6655 || (which == 1 && !negated))
6660 strcat (buf, ",n %0,%1,.+%4");
6662 strcat (buf, " %0,%1,.+%4");
6663 output_asm_insn (buf, operands);
6664 return output_lbranch (negated ? operands[3] : operands[2],
6670 /* This routine handles all the branch-on-variable-bit conditional branch
6671 sequences we might need to generate. It handles nullification of delay
6672 slots, varying length branches, negated branches and all combinations
6673 of the above. it returns the appropriate output template to emit the
6677 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6679 static char buf[100];
6681 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6682 int length = get_attr_length (insn);
6685 /* A conditional branch to the following instruction (e.g. the delay slot) is
6686 asking for a disaster. I do not think this can happen as this pattern
6687 is only used when optimizing; jump optimization should eliminate the
6688 jump. But be prepared just in case. */
6690 if (branch_to_delay_slot_p (insn))
6693 /* If this is a long branch with its delay slot unfilled, set `nullify'
6694 as it can nullify the delay slot and save a nop. */
6695 if (length == 8 && dbr_sequence_length () == 0)
6698 /* If this is a short forward conditional branch which did not get
6699 its delay slot filled, the delay slot can still be nullified. */
6700 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6701 nullify = forward_branch_p (insn);
6703 /* A forward branch over a single nullified insn can be done with a
6704 extrs instruction. This avoids a single cycle penalty due to
6705 mis-predicted branch if we fall through (branch not taken). */
6708 && next_real_insn (insn) != 0
6709 && get_attr_length (next_real_insn (insn)) == 4
6710 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6717 /* All short conditional branches except backwards with an unfilled
6721 strcpy (buf, "{vextrs,|extrw,s,}");
6723 strcpy (buf, "{bvb,|bb,}");
6724 if (useskip && GET_MODE (operands[0]) == DImode)
6725 strcpy (buf, "extrd,s,*");
6726 else if (GET_MODE (operands[0]) == DImode)
6727 strcpy (buf, "bb,*");
6728 if ((which == 0 && negated)
6729 || (which == 1 && ! negated))
6734 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6735 else if (nullify && negated)
6737 if (branch_needs_nop_p (insn))
6738 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6740 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6742 else if (nullify && ! negated)
6744 if (branch_needs_nop_p (insn))
6745 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6747 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6749 else if (! nullify && negated)
6750 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6751 else if (! nullify && ! negated)
6752 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6755 /* All long conditionals. Note a short backward branch with an
6756 unfilled delay slot is treated just like a long backward branch
6757 with an unfilled delay slot. */
6759 /* Handle weird backwards branch with a filled delay slot
6760 which is nullified. */
6761 if (dbr_sequence_length () != 0
6762 && ! forward_branch_p (insn)
6765 strcpy (buf, "{bvb,|bb,}");
6766 if (GET_MODE (operands[0]) == DImode)
6768 if ((which == 0 && negated)
6769 || (which == 1 && ! negated))
6774 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6776 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6778 /* Handle short backwards branch with an unfilled delay slot.
6779 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6780 taken and untaken branches. */
6781 else if (dbr_sequence_length () == 0
6782 && ! forward_branch_p (insn)
6783 && INSN_ADDRESSES_SET_P ()
6784 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6785 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6787 strcpy (buf, "{bvb,|bb,}");
6788 if (GET_MODE (operands[0]) == DImode)
6790 if ((which == 0 && negated)
6791 || (which == 1 && ! negated))
6796 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6798 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6802 strcpy (buf, "{vextrs,|extrw,s,}");
6803 if (GET_MODE (operands[0]) == DImode)
6804 strcpy (buf, "extrd,s,*");
6805 if ((which == 0 && negated)
6806 || (which == 1 && ! negated))
6810 if (nullify && negated)
6811 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6812 else if (nullify && ! negated)
6813 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6815 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6817 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6822 /* The reversed conditional branch must branch over one additional
6823 instruction if the delay slot is filled and needs to be extracted
6824 by output_lbranch. If the delay slot is empty or this is a
6825 nullified forward branch, the instruction after the reversed
6826 condition branch must be nullified. */
6827 if (dbr_sequence_length () == 0
6828 || (nullify && forward_branch_p (insn)))
6832 operands[4] = GEN_INT (length);
6837 operands[4] = GEN_INT (length + 4);
6840 if (GET_MODE (operands[0]) == DImode)
6841 strcpy (buf, "bb,*");
6843 strcpy (buf, "{bvb,|bb,}");
6844 if ((which == 0 && negated)
6845 || (which == 1 && !negated))
6850 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6852 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6853 output_asm_insn (buf, operands);
6854 return output_lbranch (negated ? operands[3] : operands[2],
6860 /* Return the output template for emitting a dbra type insn.
6862 Note it may perform some output operations on its own before
6863 returning the final output string. */
6865 output_dbra (rtx *operands, rtx insn, int which_alternative)
6867 int length = get_attr_length (insn);
6869 /* A conditional branch to the following instruction (e.g. the delay slot) is
6870 asking for a disaster. Be prepared! */
6872 if (branch_to_delay_slot_p (insn))
6874 if (which_alternative == 0)
6875 return "ldo %1(%0),%0";
6876 else if (which_alternative == 1)
6878 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6879 output_asm_insn ("ldw -16(%%r30),%4", operands);
6880 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6881 return "{fldws|fldw} -16(%%r30),%0";
6885 output_asm_insn ("ldw %0,%4", operands);
6886 return "ldo %1(%4),%4\n\tstw %4,%0";
6890 if (which_alternative == 0)
6892 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6895 /* If this is a long branch with its delay slot unfilled, set `nullify'
6896 as it can nullify the delay slot and save a nop. */
6897 if (length == 8 && dbr_sequence_length () == 0)
6900 /* If this is a short forward conditional branch which did not get
6901 its delay slot filled, the delay slot can still be nullified. */
6902 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6903 nullify = forward_branch_p (insn);
6910 if (branch_needs_nop_p (insn))
6911 return "addib,%C2,n %1,%0,%3%#";
6913 return "addib,%C2,n %1,%0,%3";
6916 return "addib,%C2 %1,%0,%3";
6919 /* Handle weird backwards branch with a fulled delay slot
6920 which is nullified. */
6921 if (dbr_sequence_length () != 0
6922 && ! forward_branch_p (insn)
6924 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6925 /* Handle short backwards branch with an unfilled delay slot.
6926 Using a addb;nop rather than addi;bl saves 1 cycle for both
6927 taken and untaken branches. */
6928 else if (dbr_sequence_length () == 0
6929 && ! forward_branch_p (insn)
6930 && INSN_ADDRESSES_SET_P ()
6931 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6932 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6933 return "addib,%C2 %1,%0,%3%#";
6935 /* Handle normal cases. */
6937 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6939 return "addi,%N2 %1,%0,%0\n\tb %3";
6942 /* The reversed conditional branch must branch over one additional
6943 instruction if the delay slot is filled and needs to be extracted
6944 by output_lbranch. If the delay slot is empty or this is a
6945 nullified forward branch, the instruction after the reversed
6946 condition branch must be nullified. */
6947 if (dbr_sequence_length () == 0
6948 || (nullify && forward_branch_p (insn)))
6952 operands[4] = GEN_INT (length);
6957 operands[4] = GEN_INT (length + 4);
6961 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
6963 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
6965 return output_lbranch (operands[3], insn, xdelay);
6969 /* Deal with gross reload from FP register case. */
6970 else if (which_alternative == 1)
6972 /* Move loop counter from FP register to MEM then into a GR,
6973 increment the GR, store the GR into MEM, and finally reload
6974 the FP register from MEM from within the branch's delay slot. */
6975 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6977 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6979 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6980 else if (length == 28)
6981 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6984 operands[5] = GEN_INT (length - 16);
6985 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
6986 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6987 return output_lbranch (operands[3], insn, 0);
6990 /* Deal with gross reload from memory case. */
6993 /* Reload loop counter from memory, the store back to memory
6994 happens in the branch's delay slot. */
6995 output_asm_insn ("ldw %0,%4", operands);
6997 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6998 else if (length == 16)
6999 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7002 operands[5] = GEN_INT (length - 4);
7003 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7004 return output_lbranch (operands[3], insn, 0);
7009 /* Return the output template for emitting a movb type insn.
7011 Note it may perform some output operations on its own before
7012 returning the final output string. */
7014 output_movb (rtx *operands, rtx insn, int which_alternative,
7015 int reverse_comparison)
7017 int length = get_attr_length (insn);
7019 /* A conditional branch to the following instruction (e.g. the delay slot) is
7020 asking for a disaster. Be prepared! */
7022 if (branch_to_delay_slot_p (insn))
7024 if (which_alternative == 0)
7025 return "copy %1,%0";
7026 else if (which_alternative == 1)
7028 output_asm_insn ("stw %1,-16(%%r30)", operands);
7029 return "{fldws|fldw} -16(%%r30),%0";
7031 else if (which_alternative == 2)
7037 /* Support the second variant. */
7038 if (reverse_comparison)
7039 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7041 if (which_alternative == 0)
7043 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7046 /* If this is a long branch with its delay slot unfilled, set `nullify'
7047 as it can nullify the delay slot and save a nop. */
7048 if (length == 8 && dbr_sequence_length () == 0)
7051 /* If this is a short forward conditional branch which did not get
7052 its delay slot filled, the delay slot can still be nullified. */
7053 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7054 nullify = forward_branch_p (insn);
7061 if (branch_needs_nop_p (insn))
7062 return "movb,%C2,n %1,%0,%3%#";
7064 return "movb,%C2,n %1,%0,%3";
7067 return "movb,%C2 %1,%0,%3";
7070 /* Handle weird backwards branch with a filled delay slot
7071 which is nullified. */
7072 if (dbr_sequence_length () != 0
7073 && ! forward_branch_p (insn)
7075 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7077 /* Handle short backwards branch with an unfilled delay slot.
7078 Using a movb;nop rather than or;bl saves 1 cycle for both
7079 taken and untaken branches. */
7080 else if (dbr_sequence_length () == 0
7081 && ! forward_branch_p (insn)
7082 && INSN_ADDRESSES_SET_P ()
7083 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7084 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7085 return "movb,%C2 %1,%0,%3%#";
7086 /* Handle normal cases. */
7088 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7090 return "or,%N2 %1,%%r0,%0\n\tb %3";
7093 /* The reversed conditional branch must branch over one additional
7094 instruction if the delay slot is filled and needs to be extracted
7095 by output_lbranch. If the delay slot is empty or this is a
7096 nullified forward branch, the instruction after the reversed
7097 condition branch must be nullified. */
7098 if (dbr_sequence_length () == 0
7099 || (nullify && forward_branch_p (insn)))
7103 operands[4] = GEN_INT (length);
7108 operands[4] = GEN_INT (length + 4);
7112 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7114 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7116 return output_lbranch (operands[3], insn, xdelay);
7119 /* Deal with gross reload for FP destination register case. */
7120 else if (which_alternative == 1)
7122 /* Move source register to MEM, perform the branch test, then
7123 finally load the FP register from MEM from within the branch's
7125 output_asm_insn ("stw %1,-16(%%r30)", operands);
7127 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7128 else if (length == 16)
7129 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7132 operands[4] = GEN_INT (length - 4);
7133 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7134 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7135 return output_lbranch (operands[3], insn, 0);
7138 /* Deal with gross reload from memory case. */
7139 else if (which_alternative == 2)
7141 /* Reload loop counter from memory, the store back to memory
7142 happens in the branch's delay slot. */
7144 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7145 else if (length == 12)
7146 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7149 operands[4] = GEN_INT (length);
7150 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7152 return output_lbranch (operands[3], insn, 0);
7155 /* Handle SAR as a destination. */
7159 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7160 else if (length == 12)
7161 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7164 operands[4] = GEN_INT (length);
7165 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7167 return output_lbranch (operands[3], insn, 0);
7172 /* Copy any FP arguments in INSN into integer registers. */
7174 copy_fp_args (rtx insn)
7179 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7181 int arg_mode, regno;
7182 rtx use = XEXP (link, 0);
7184 if (! (GET_CODE (use) == USE
7185 && GET_CODE (XEXP (use, 0)) == REG
7186 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7189 arg_mode = GET_MODE (XEXP (use, 0));
7190 regno = REGNO (XEXP (use, 0));
7192 /* Is it a floating point register? */
7193 if (regno >= 32 && regno <= 39)
7195 /* Copy the FP register into an integer register via memory. */
7196 if (arg_mode == SFmode)
7198 xoperands[0] = XEXP (use, 0);
7199 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7200 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7201 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7205 xoperands[0] = XEXP (use, 0);
7206 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7207 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7208 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7209 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7215 /* Compute length of the FP argument copy sequence for INSN. */
7217 length_fp_args (rtx insn)
7222 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7224 int arg_mode, regno;
7225 rtx use = XEXP (link, 0);
7227 if (! (GET_CODE (use) == USE
7228 && GET_CODE (XEXP (use, 0)) == REG
7229 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7232 arg_mode = GET_MODE (XEXP (use, 0));
7233 regno = REGNO (XEXP (use, 0));
7235 /* Is it a floating point register? */
7236 if (regno >= 32 && regno <= 39)
7238 if (arg_mode == SFmode)
7248 /* Return the attribute length for the millicode call instruction INSN.
7249 The length must match the code generated by output_millicode_call.
7250 We include the delay slot in the returned length as it is better to
7251 over estimate the length than to under estimate it. */
7254 attr_length_millicode_call (rtx insn)
7256 unsigned long distance = -1;
7257 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7259 if (INSN_ADDRESSES_SET_P ())
7261 distance = (total + insn_current_reference_address (insn));
7262 if (distance < total)
7268 if (!TARGET_LONG_CALLS && distance < 7600000)
7273 else if (TARGET_PORTABLE_RUNTIME)
7277 if (!TARGET_LONG_CALLS && distance < 240000)
7280 if (TARGET_LONG_ABS_CALL && !flag_pic)
7287 /* INSN is a function call. It may have an unconditional jump
7290 CALL_DEST is the routine we are calling. */
7293 output_millicode_call (rtx insn, rtx call_dest)
7295 int attr_length = get_attr_length (insn);
7296 int seq_length = dbr_sequence_length ();
7301 xoperands[0] = call_dest;
7302 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7304 /* Handle the common case where we are sure that the branch will
7305 reach the beginning of the $CODE$ subspace. The within reach
7306 form of the $$sh_func_adrs call has a length of 28. Because
7307 it has an attribute type of multi, it never has a nonzero
7308 sequence length. The length of the $$sh_func_adrs is the same
7309 as certain out of reach PIC calls to other routines. */
7310 if (!TARGET_LONG_CALLS
7311 && ((seq_length == 0
7312 && (attr_length == 12
7313 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7314 || (seq_length != 0 && attr_length == 8)))
7316 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7322 /* It might seem that one insn could be saved by accessing
7323 the millicode function using the linkage table. However,
7324 this doesn't work in shared libraries and other dynamically
7325 loaded objects. Using a pc-relative sequence also avoids
7326 problems related to the implicit use of the gp register. */
7327 output_asm_insn ("b,l .+8,%%r1", xoperands);
7331 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7332 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7336 xoperands[1] = gen_label_rtx ();
7337 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7338 targetm.asm_out.internal_label (asm_out_file, "L",
7339 CODE_LABEL_NUMBER (xoperands[1]));
7340 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7343 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7345 else if (TARGET_PORTABLE_RUNTIME)
7347 /* Pure portable runtime doesn't allow be/ble; we also don't
7348 have PIC support in the assembler/linker, so this sequence
7351 /* Get the address of our target into %r1. */
7352 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7353 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7355 /* Get our return address into %r31. */
7356 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7357 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7359 /* Jump to our target address in %r1. */
7360 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7364 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7366 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7368 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7372 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7373 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7375 if (TARGET_SOM || !TARGET_GAS)
7377 /* The HP assembler can generate relocations for the
7378 difference of two symbols. GAS can do this for a
7379 millicode symbol but not an arbitrary external
7380 symbol when generating SOM output. */
7381 xoperands[1] = gen_label_rtx ();
7382 targetm.asm_out.internal_label (asm_out_file, "L",
7383 CODE_LABEL_NUMBER (xoperands[1]));
7384 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7385 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7389 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7390 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7394 /* Jump to our target address in %r1. */
7395 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7399 if (seq_length == 0)
7400 output_asm_insn ("nop", xoperands);
7402 /* We are done if there isn't a jump in the delay slot. */
7403 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7406 /* This call has an unconditional jump in its delay slot. */
7407 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7409 /* See if the return address can be adjusted. Use the containing
7410 sequence insn's address. */
7411 if (INSN_ADDRESSES_SET_P ())
7413 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7414 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7415 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7417 if (VAL_14_BITS_P (distance))
7419 xoperands[1] = gen_label_rtx ();
7420 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7421 targetm.asm_out.internal_label (asm_out_file, "L",
7422 CODE_LABEL_NUMBER (xoperands[1]));
7425 /* ??? This branch may not reach its target. */
7426 output_asm_insn ("nop\n\tb,n %0", xoperands);
7429 /* ??? This branch may not reach its target. */
7430 output_asm_insn ("nop\n\tb,n %0", xoperands);
7432 /* Delete the jump. */
7433 SET_INSN_DELETED (NEXT_INSN (insn));
7438 /* Return the attribute length of the call instruction INSN. The SIBCALL
7439 flag indicates whether INSN is a regular call or a sibling call. The
7440 length returned must be longer than the code actually generated by
7441 output_call. Since branch shortening is done before delay branch
7442 sequencing, there is no way to determine whether or not the delay
7443 slot will be filled during branch shortening. Even when the delay
7444 slot is filled, we may have to add a nop if the delay slot contains
7445 a branch that can't reach its target. Thus, we always have to include
7446 the delay slot in the length estimate. This used to be done in
7447 pa_adjust_insn_length but we do it here now as some sequences always
7448 fill the delay slot and we can save four bytes in the estimate for
7452 attr_length_call (rtx insn, int sibcall)
7455 rtx call, call_dest;
7458 rtx pat = PATTERN (insn);
7459 unsigned long distance = -1;
7461 gcc_assert (GET_CODE (insn) == CALL_INSN);
7463 if (INSN_ADDRESSES_SET_P ())
7465 unsigned long total;
7467 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7468 distance = (total + insn_current_reference_address (insn));
7469 if (distance < total)
7473 gcc_assert (GET_CODE (pat) == PARALLEL);
7475 /* Get the call rtx. */
7476 call = XVECEXP (pat, 0, 0);
7477 if (GET_CODE (call) == SET)
7478 call = SET_SRC (call);
7480 gcc_assert (GET_CODE (call) == CALL);
7482 /* Determine if this is a local call. */
7483 call_dest = XEXP (XEXP (call, 0), 0);
7484 call_decl = SYMBOL_REF_DECL (call_dest);
7485 local_call = call_decl && targetm.binds_local_p (call_decl);
7487 /* pc-relative branch. */
7488 if (!TARGET_LONG_CALLS
7489 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7490 || distance < 240000))
7493 /* 64-bit plabel sequence. */
7494 else if (TARGET_64BIT && !local_call)
7495 length += sibcall ? 28 : 24;
7497 /* non-pic long absolute branch sequence. */
7498 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7501 /* long pc-relative branch sequence. */
7502 else if (TARGET_LONG_PIC_SDIFF_CALL
7503 || (TARGET_GAS && !TARGET_SOM
7504 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7508 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7512 /* 32-bit plabel sequence. */
7518 length += length_fp_args (insn);
7528 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7536 /* INSN is a function call. It may have an unconditional jump
7539 CALL_DEST is the routine we are calling. */
7542 output_call (rtx insn, rtx call_dest, int sibcall)
7544 int delay_insn_deleted = 0;
7545 int delay_slot_filled = 0;
7546 int seq_length = dbr_sequence_length ();
7547 tree call_decl = SYMBOL_REF_DECL (call_dest);
7548 int local_call = call_decl && targetm.binds_local_p (call_decl);
7551 xoperands[0] = call_dest;
7553 /* Handle the common case where we're sure that the branch will reach
7554 the beginning of the "$CODE$" subspace. This is the beginning of
7555 the current function if we are in a named section. */
7556 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7558 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7559 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7563 if (TARGET_64BIT && !local_call)
7565 /* ??? As far as I can tell, the HP linker doesn't support the
7566 long pc-relative sequence described in the 64-bit runtime
7567 architecture. So, we use a slightly longer indirect call. */
7568 xoperands[0] = get_deferred_plabel (call_dest);
7569 xoperands[1] = gen_label_rtx ();
7571 /* If this isn't a sibcall, we put the load of %r27 into the
7572 delay slot. We can't do this in a sibcall as we don't
7573 have a second call-clobbered scratch register available. */
7575 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7578 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7581 /* Now delete the delay insn. */
7582 SET_INSN_DELETED (NEXT_INSN (insn));
7583 delay_insn_deleted = 1;
7586 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7587 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7588 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7592 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7593 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7594 output_asm_insn ("bve (%%r1)", xoperands);
7598 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7599 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7600 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7601 delay_slot_filled = 1;
7606 int indirect_call = 0;
7608 /* Emit a long call. There are several different sequences
7609 of increasing length and complexity. In most cases,
7610 they don't allow an instruction in the delay slot. */
7611 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7612 && !TARGET_LONG_PIC_SDIFF_CALL
7613 && !(TARGET_GAS && !TARGET_SOM
7614 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7619 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7623 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7625 /* A non-jump insn in the delay slot. By definition we can
7626 emit this insn before the call (and in fact before argument
7628 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7631 /* Now delete the delay insn. */
7632 SET_INSN_DELETED (NEXT_INSN (insn));
7633 delay_insn_deleted = 1;
7636 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7638 /* This is the best sequence for making long calls in
7639 non-pic code. Unfortunately, GNU ld doesn't provide
7640 the stub needed for external calls, and GAS's support
7641 for this with the SOM linker is buggy. It is safe
7642 to use this for local calls. */
7643 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7645 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7649 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7652 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7654 output_asm_insn ("copy %%r31,%%r2", xoperands);
7655 delay_slot_filled = 1;
7660 if (TARGET_LONG_PIC_SDIFF_CALL)
7662 /* The HP assembler and linker can handle relocations
7663 for the difference of two symbols. The HP assembler
7664 recognizes the sequence as a pc-relative call and
7665 the linker provides stubs when needed. */
7666 xoperands[1] = gen_label_rtx ();
7667 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7668 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7669 targetm.asm_out.internal_label (asm_out_file, "L",
7670 CODE_LABEL_NUMBER (xoperands[1]));
7671 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7673 else if (TARGET_GAS && !TARGET_SOM
7674 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7676 /* GAS currently can't generate the relocations that
7677 are needed for the SOM linker under HP-UX using this
7678 sequence. The GNU linker doesn't generate the stubs
7679 that are needed for external calls on TARGET_ELF32
7680 with this sequence. For now, we have to use a
7681 longer plabel sequence when using GAS. */
7682 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7683 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7685 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7690 /* Emit a long plabel-based call sequence. This is
7691 essentially an inline implementation of $$dyncall.
7692 We don't actually try to call $$dyncall as this is
7693 as difficult as calling the function itself. */
7694 xoperands[0] = get_deferred_plabel (call_dest);
7695 xoperands[1] = gen_label_rtx ();
7697 /* Since the call is indirect, FP arguments in registers
7698 need to be copied to the general registers. Then, the
7699 argument relocation stub will copy them back. */
7701 copy_fp_args (insn);
7705 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7706 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7707 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7711 output_asm_insn ("addil LR'%0-$global$,%%r27",
7713 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7717 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7718 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7719 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7720 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7722 if (!sibcall && !TARGET_PA_20)
7724 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7725 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7726 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7728 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7735 output_asm_insn ("bve (%%r1)", xoperands);
7740 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7741 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7742 delay_slot_filled = 1;
7745 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7750 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7751 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7756 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7757 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7759 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7763 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7764 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7766 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7769 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7771 output_asm_insn ("copy %%r31,%%r2", xoperands);
7772 delay_slot_filled = 1;
7779 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7780 output_asm_insn ("nop", xoperands);
7782 /* We are done if there isn't a jump in the delay slot. */
7784 || delay_insn_deleted
7785 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7788 /* A sibcall should never have a branch in the delay slot. */
7789 gcc_assert (!sibcall);
7791 /* This call has an unconditional jump in its delay slot. */
7792 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7794 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7796 /* See if the return address can be adjusted. Use the containing
7797 sequence insn's address. This would break the regular call/return@
7798 relationship assumed by the table based eh unwinder, so only do that
7799 if the call is not possibly throwing. */
7800 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7801 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7802 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7804 if (VAL_14_BITS_P (distance)
7805 && !(can_throw_internal (insn) || can_throw_external (insn)))
7807 xoperands[1] = gen_label_rtx ();
7808 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7809 targetm.asm_out.internal_label (asm_out_file, "L",
7810 CODE_LABEL_NUMBER (xoperands[1]));
7813 output_asm_insn ("nop\n\tb,n %0", xoperands);
7816 output_asm_insn ("b,n %0", xoperands);
7818 /* Delete the jump. */
7819 SET_INSN_DELETED (NEXT_INSN (insn));
7824 /* Return the attribute length of the indirect call instruction INSN.
7825 The length must match the code generated by output_indirect call.
7826 The returned length includes the delay slot. Currently, the delay
7827 slot of an indirect call sequence is not exposed and it is used by
7828 the sequence itself. */
7831 attr_length_indirect_call (rtx insn)
7833 unsigned long distance = -1;
7834 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7836 if (INSN_ADDRESSES_SET_P ())
7838 distance = (total + insn_current_reference_address (insn));
7839 if (distance < total)
7846 if (TARGET_FAST_INDIRECT_CALLS
7847 || (!TARGET_PORTABLE_RUNTIME
7848 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7849 || distance < 240000)))
7855 if (TARGET_PORTABLE_RUNTIME)
7858 /* Out of reach, can use ble. */
7863 output_indirect_call (rtx insn, rtx call_dest)
7869 xoperands[0] = call_dest;
7870 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7871 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7875 /* First the special case for kernels, level 0 systems, etc. */
7876 if (TARGET_FAST_INDIRECT_CALLS)
7877 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7879 /* Now the normal case -- we can reach $$dyncall directly or
7880 we're sure that we can get there via a long-branch stub.
7882 No need to check target flags as the length uniquely identifies
7883 the remaining cases. */
7884 if (attr_length_indirect_call (insn) == 8)
7886 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7887 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
7888 variant of the B,L instruction can't be used on the SOM target. */
7889 if (TARGET_PA_20 && !TARGET_SOM)
7890 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7892 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7895 /* Long millicode call, but we are not generating PIC or portable runtime
7897 if (attr_length_indirect_call (insn) == 12)
7898 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7900 /* Long millicode call for portable runtime. */
7901 if (attr_length_indirect_call (insn) == 20)
7902 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7904 /* We need a long PIC call to $$dyncall. */
7905 xoperands[0] = NULL_RTX;
7906 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7907 if (TARGET_SOM || !TARGET_GAS)
7909 xoperands[0] = gen_label_rtx ();
7910 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7911 targetm.asm_out.internal_label (asm_out_file, "L",
7912 CODE_LABEL_NUMBER (xoperands[0]));
7913 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7917 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7918 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7921 output_asm_insn ("blr %%r0,%%r2", xoperands);
7922 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7926 /* Return the total length of the save and restore instructions needed for
7927 the data linkage table pointer (i.e., the PIC register) across the call
7928 instruction INSN. No-return calls do not require a save and restore.
7929 In addition, we may be able to avoid the save and restore for calls
7930 within the same translation unit. */
7933 attr_length_save_restore_dltp (rtx insn)
7935 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7941 /* In HPUX 8.0's shared library scheme, special relocations are needed
7942 for function labels if they might be passed to a function
7943 in a shared library (because shared libraries don't live in code
7944 space), and special magic is needed to construct their address. */
7947 hppa_encode_label (rtx sym)
7949 const char *str = XSTR (sym, 0);
7950 int len = strlen (str) + 1;
7953 p = newstr = XALLOCAVEC (char, len + 1);
7957 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7961 pa_encode_section_info (tree decl, rtx rtl, int first)
7963 int old_referenced = 0;
7965 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
7967 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
7969 default_encode_section_info (decl, rtl, first);
7971 if (first && TEXT_SPACE_P (decl))
7973 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7974 if (TREE_CODE (decl) == FUNCTION_DECL)
7975 hppa_encode_label (XEXP (rtl, 0));
7977 else if (old_referenced)
7978 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
7981 /* This is sort of inverse to pa_encode_section_info. */
7984 pa_strip_name_encoding (const char *str)
7986 str += (*str == '@');
7987 str += (*str == '*');
7992 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7994 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7997 /* Returns 1 if OP is a function label involved in a simple addition
7998 with a constant. Used to keep certain patterns from matching
7999 during instruction combination. */
8001 is_function_label_plus_const (rtx op)
8003 /* Strip off any CONST. */
8004 if (GET_CODE (op) == CONST)
8007 return (GET_CODE (op) == PLUS
8008 && function_label_operand (XEXP (op, 0), Pmode)
8009 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8012 /* Output assembly code for a thunk to FUNCTION. */
8015 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8016 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8019 static unsigned int current_thunk_number;
8020 int val_14 = VAL_14_BITS_P (delta);
8021 unsigned int old_last_address = last_address, nbytes = 0;
8025 xoperands[0] = XEXP (DECL_RTL (function), 0);
8026 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8027 xoperands[2] = GEN_INT (delta);
8029 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8030 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8032 /* Output the thunk. We know that the function is in the same
8033 translation unit (i.e., the same space) as the thunk, and that
8034 thunks are output after their method. Thus, we don't need an
8035 external branch to reach the function. With SOM and GAS,
8036 functions and thunks are effectively in different sections.
8037 Thus, we can always use a IA-relative branch and the linker
8038 will add a long branch stub if necessary.
8040 However, we have to be careful when generating PIC code on the
8041 SOM port to ensure that the sequence does not transfer to an
8042 import stub for the target function as this could clobber the
8043 return value saved at SP-24. This would also apply to the
8044 32-bit linux port if the multi-space model is implemented. */
8045 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8046 && !(flag_pic && TREE_PUBLIC (function))
8047 && (TARGET_GAS || last_address < 262132))
8048 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8049 && ((targetm.have_named_sections
8050 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8051 /* The GNU 64-bit linker has rather poor stub management.
8052 So, we use a long branch from thunks that aren't in
8053 the same section as the target function. */
8055 && (DECL_SECTION_NAME (thunk_fndecl)
8056 != DECL_SECTION_NAME (function)))
8057 || ((DECL_SECTION_NAME (thunk_fndecl)
8058 == DECL_SECTION_NAME (function))
8059 && last_address < 262132)))
8060 || (targetm.have_named_sections
8061 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8062 && DECL_SECTION_NAME (function) == NULL
8063 && last_address < 262132)
8064 || (!targetm.have_named_sections && last_address < 262132))))
8067 output_asm_insn ("addil L'%2,%%r26", xoperands);
8069 output_asm_insn ("b %0", xoperands);
8073 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8078 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8082 else if (TARGET_64BIT)
8084 /* We only have one call-clobbered scratch register, so we can't
8085 make use of the delay slot if delta doesn't fit in 14 bits. */
8088 output_asm_insn ("addil L'%2,%%r26", xoperands);
8089 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8092 output_asm_insn ("b,l .+8,%%r1", xoperands);
8096 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8097 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8101 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8102 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8107 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8108 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8113 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8117 else if (TARGET_PORTABLE_RUNTIME)
8119 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8120 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8123 output_asm_insn ("addil L'%2,%%r26", xoperands);
8125 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8129 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8134 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8138 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8140 /* The function is accessible from outside this module. The only
8141 way to avoid an import stub between the thunk and function is to
8142 call the function directly with an indirect sequence similar to
8143 that used by $$dyncall. This is possible because $$dyncall acts
8144 as the import stub in an indirect call. */
8145 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8146 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8147 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8148 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8149 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8150 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8151 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8152 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8153 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8157 output_asm_insn ("addil L'%2,%%r26", xoperands);
8163 output_asm_insn ("bve (%%r22)", xoperands);
8166 else if (TARGET_NO_SPACE_REGS)
8168 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8173 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8174 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8175 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8180 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8182 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8186 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8188 if (TARGET_SOM || !TARGET_GAS)
8190 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8191 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8195 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8196 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8200 output_asm_insn ("addil L'%2,%%r26", xoperands);
8202 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8206 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8211 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8218 output_asm_insn ("addil L'%2,%%r26", xoperands);
8220 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8221 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8225 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8230 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8235 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8237 if (TARGET_SOM && TARGET_GAS)
8239 /* We done with this subspace except possibly for some additional
8240 debug information. Forget that we are in this subspace to ensure
8241 that the next function is output in its own subspace. */
8243 cfun->machine->in_nsubspa = 2;
8246 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8248 switch_to_section (data_section);
8249 output_asm_insn (".align 4", xoperands);
8250 ASM_OUTPUT_LABEL (file, label);
8251 output_asm_insn (".word P'%0", xoperands);
8254 current_thunk_number++;
8255 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8256 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8257 last_address += nbytes;
8258 if (old_last_address > last_address)
8259 last_address = UINT_MAX;
8260 update_total_code_bytes (nbytes);
8263 /* Only direct calls to static functions are allowed to be sibling (tail)
8266 This restriction is necessary because some linker generated stubs will
8267 store return pointers into rp' in some cases which might clobber a
8268 live value already in rp'.
8270 In a sibcall the current function and the target function share stack
8271 space. Thus if the path to the current function and the path to the
8272 target function save a value in rp', they save the value into the
8273 same stack slot, which has undesirable consequences.
8275 Because of the deferred binding nature of shared libraries any function
8276 with external scope could be in a different load module and thus require
8277 rp' to be saved when calling that function. So sibcall optimizations
8278 can only be safe for static function.
8280 Note that GCC never needs return value relocations, so we don't have to
8281 worry about static calls with return value relocations (which require
8284 It is safe to perform a sibcall optimization when the target function
8285 will never return. */
8287 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8289 if (TARGET_PORTABLE_RUNTIME)
8292 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8293 single subspace mode and the call is not indirect. As far as I know,
8294 there is no operating system support for the multiple subspace mode.
8295 It might be possible to support indirect calls if we didn't use
8296 $$dyncall (see the indirect sequence generated in output_call). */
8298 return (decl != NULL_TREE);
8300 /* Sibcalls are not ok because the arg pointer register is not a fixed
8301 register. This prevents the sibcall optimization from occurring. In
8302 addition, there are problems with stub placement using GNU ld. This
8303 is because a normal sibcall branch uses a 17-bit relocation while
8304 a regular call branch uses a 22-bit relocation. As a result, more
8305 care needs to be taken in the placement of long-branch stubs. */
8309 /* Sibcalls are only ok within a translation unit. */
8310 return (decl && !TREE_PUBLIC (decl));
8313 /* ??? Addition is not commutative on the PA due to the weird implicit
8314 space register selection rules for memory addresses. Therefore, we
8315 don't consider a + b == b + a, as this might be inside a MEM. */
8317 pa_commutative_p (const_rtx x, int outer_code)
8319 return (COMMUTATIVE_P (x)
8320 && (TARGET_NO_SPACE_REGS
8321 || (outer_code != UNKNOWN && outer_code != MEM)
8322 || GET_CODE (x) != PLUS));
8325 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8326 use in fmpyadd instructions. */
8328 fmpyaddoperands (rtx *operands)
8330 enum machine_mode mode = GET_MODE (operands[0]);
8332 /* Must be a floating point mode. */
8333 if (mode != SFmode && mode != DFmode)
8336 /* All modes must be the same. */
8337 if (! (mode == GET_MODE (operands[1])
8338 && mode == GET_MODE (operands[2])
8339 && mode == GET_MODE (operands[3])
8340 && mode == GET_MODE (operands[4])
8341 && mode == GET_MODE (operands[5])))
8344 /* All operands must be registers. */
8345 if (! (GET_CODE (operands[1]) == REG
8346 && GET_CODE (operands[2]) == REG
8347 && GET_CODE (operands[3]) == REG
8348 && GET_CODE (operands[4]) == REG
8349 && GET_CODE (operands[5]) == REG))
8352 /* Only 2 real operands to the addition. One of the input operands must
8353 be the same as the output operand. */
8354 if (! rtx_equal_p (operands[3], operands[4])
8355 && ! rtx_equal_p (operands[3], operands[5]))
8358 /* Inout operand of add cannot conflict with any operands from multiply. */
8359 if (rtx_equal_p (operands[3], operands[0])
8360 || rtx_equal_p (operands[3], operands[1])
8361 || rtx_equal_p (operands[3], operands[2]))
8364 /* multiply cannot feed into addition operands. */
8365 if (rtx_equal_p (operands[4], operands[0])
8366 || rtx_equal_p (operands[5], operands[0]))
8369 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8371 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8372 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8373 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8374 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8375 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8376 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8379 /* Passed. Operands are suitable for fmpyadd. */
8383 #if !defined(USE_COLLECT2)
8385 pa_asm_out_constructor (rtx symbol, int priority)
8387 if (!function_label_operand (symbol, VOIDmode))
8388 hppa_encode_label (symbol);
8390 #ifdef CTORS_SECTION_ASM_OP
8391 default_ctor_section_asm_out_constructor (symbol, priority);
8393 # ifdef TARGET_ASM_NAMED_SECTION
8394 default_named_section_asm_out_constructor (symbol, priority);
8396 default_stabs_asm_out_constructor (symbol, priority);
8402 pa_asm_out_destructor (rtx symbol, int priority)
8404 if (!function_label_operand (symbol, VOIDmode))
8405 hppa_encode_label (symbol);
8407 #ifdef DTORS_SECTION_ASM_OP
8408 default_dtor_section_asm_out_destructor (symbol, priority);
8410 # ifdef TARGET_ASM_NAMED_SECTION
8411 default_named_section_asm_out_destructor (symbol, priority);
8413 default_stabs_asm_out_destructor (symbol, priority);
8419 /* This function places uninitialized global data in the bss section.
8420 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8421 function on the SOM port to prevent uninitialized global data from
8422 being placed in the data section. */
8425 pa_asm_output_aligned_bss (FILE *stream,
8427 unsigned HOST_WIDE_INT size,
8430 switch_to_section (bss_section);
8431 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8433 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8434 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8437 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8438 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8441 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8442 ASM_OUTPUT_LABEL (stream, name);
8443 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8446 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8447 that doesn't allow the alignment of global common storage to be directly
8448 specified. The SOM linker aligns common storage based on the rounded
8449 value of the NUM_BYTES parameter in the .comm directive. It's not
8450 possible to use the .align directive as it doesn't affect the alignment
8451 of the label associated with a .comm directive. */
8454 pa_asm_output_aligned_common (FILE *stream,
8456 unsigned HOST_WIDE_INT size,
8459 unsigned int max_common_align;
8461 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8462 if (align > max_common_align)
8464 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8465 "for global common data. Using %u",
8466 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8467 align = max_common_align;
8470 switch_to_section (bss_section);
8472 assemble_name (stream, name);
8473 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8474 MAX (size, align / BITS_PER_UNIT));
8477 /* We can't use .comm for local common storage as the SOM linker effectively
8478 treats the symbol as universal and uses the same storage for local symbols
8479 with the same name in different object files. The .block directive
8480 reserves an uninitialized block of storage. However, it's not common
8481 storage. Fortunately, GCC never requests common storage with the same
8482 name in any given translation unit. */
8485 pa_asm_output_aligned_local (FILE *stream,
8487 unsigned HOST_WIDE_INT size,
8490 switch_to_section (bss_section);
8491 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8494 fprintf (stream, "%s", LOCAL_ASM_OP);
8495 assemble_name (stream, name);
8496 fprintf (stream, "\n");
8499 ASM_OUTPUT_LABEL (stream, name);
8500 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8503 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8504 use in fmpysub instructions. */
8506 fmpysuboperands (rtx *operands)
8508 enum machine_mode mode = GET_MODE (operands[0]);
8510 /* Must be a floating point mode. */
8511 if (mode != SFmode && mode != DFmode)
8514 /* All modes must be the same. */
8515 if (! (mode == GET_MODE (operands[1])
8516 && mode == GET_MODE (operands[2])
8517 && mode == GET_MODE (operands[3])
8518 && mode == GET_MODE (operands[4])
8519 && mode == GET_MODE (operands[5])))
8522 /* All operands must be registers. */
8523 if (! (GET_CODE (operands[1]) == REG
8524 && GET_CODE (operands[2]) == REG
8525 && GET_CODE (operands[3]) == REG
8526 && GET_CODE (operands[4]) == REG
8527 && GET_CODE (operands[5]) == REG))
8530 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8531 operation, so operands[4] must be the same as operand[3]. */
8532 if (! rtx_equal_p (operands[3], operands[4]))
8535 /* multiply cannot feed into subtraction. */
8536 if (rtx_equal_p (operands[5], operands[0]))
8539 /* Inout operand of sub cannot conflict with any operands from multiply. */
8540 if (rtx_equal_p (operands[3], operands[0])
8541 || rtx_equal_p (operands[3], operands[1])
8542 || rtx_equal_p (operands[3], operands[2]))
8545 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8547 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8548 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8549 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8550 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8551 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8552 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8555 /* Passed. Operands are suitable for fmpysub. */
8559 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8560 constants for shadd instructions. */
8562 shadd_constant_p (int val)
8564 if (val == 2 || val == 4 || val == 8)
8570 /* Return 1 if OP is valid as a base or index register in a
8574 borx_reg_operand (rtx op, enum machine_mode mode)
8576 if (GET_CODE (op) != REG)
8579 /* We must reject virtual registers as the only expressions that
8580 can be instantiated are REG and REG+CONST. */
8581 if (op == virtual_incoming_args_rtx
8582 || op == virtual_stack_vars_rtx
8583 || op == virtual_stack_dynamic_rtx
8584 || op == virtual_outgoing_args_rtx
8585 || op == virtual_cfa_rtx)
8588 /* While it's always safe to index off the frame pointer, it's not
8589 profitable to do so when the frame pointer is being eliminated. */
8590 if (!reload_completed
8591 && flag_omit_frame_pointer
8592 && !cfun->calls_alloca
8593 && op == frame_pointer_rtx)
8596 return register_operand (op, mode);
8599 /* Return 1 if this operand is anything other than a hard register. */
8602 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8604 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8607 /* Return TRUE if INSN branches forward. */
8610 forward_branch_p (rtx insn)
8612 rtx lab = JUMP_LABEL (insn);
8614 /* The INSN must have a jump label. */
8615 gcc_assert (lab != NULL_RTX);
8617 if (INSN_ADDRESSES_SET_P ())
8618 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8625 insn = NEXT_INSN (insn);
8631 /* Return 1 if OP is an equality comparison, else return 0. */
8633 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8635 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8638 /* Return 1 if INSN is in the delay slot of a call instruction. */
8640 jump_in_call_delay (rtx insn)
8643 if (GET_CODE (insn) != JUMP_INSN)
8646 if (PREV_INSN (insn)
8647 && PREV_INSN (PREV_INSN (insn))
8648 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8650 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8652 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8653 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8660 /* Output an unconditional move and branch insn. */
8663 output_parallel_movb (rtx *operands, rtx insn)
8665 int length = get_attr_length (insn);
8667 /* These are the cases in which we win. */
8669 return "mov%I1b,tr %1,%0,%2";
8671 /* None of the following cases win, but they don't lose either. */
8674 if (dbr_sequence_length () == 0)
8676 /* Nothing in the delay slot, fake it by putting the combined
8677 insn (the copy or add) in the delay slot of a bl. */
8678 if (GET_CODE (operands[1]) == CONST_INT)
8679 return "b %2\n\tldi %1,%0";
8681 return "b %2\n\tcopy %1,%0";
8685 /* Something in the delay slot, but we've got a long branch. */
8686 if (GET_CODE (operands[1]) == CONST_INT)
8687 return "ldi %1,%0\n\tb %2";
8689 return "copy %1,%0\n\tb %2";
8693 if (GET_CODE (operands[1]) == CONST_INT)
8694 output_asm_insn ("ldi %1,%0", operands);
8696 output_asm_insn ("copy %1,%0", operands);
8697 return output_lbranch (operands[2], insn, 1);
8700 /* Output an unconditional add and branch insn. */
8703 output_parallel_addb (rtx *operands, rtx insn)
8705 int length = get_attr_length (insn);
8707 /* To make life easy we want operand0 to be the shared input/output
8708 operand and operand1 to be the readonly operand. */
8709 if (operands[0] == operands[1])
8710 operands[1] = operands[2];
8712 /* These are the cases in which we win. */
8714 return "add%I1b,tr %1,%0,%3";
8716 /* None of the following cases win, but they don't lose either. */
8719 if (dbr_sequence_length () == 0)
8720 /* Nothing in the delay slot, fake it by putting the combined
8721 insn (the copy or add) in the delay slot of a bl. */
8722 return "b %3\n\tadd%I1 %1,%0,%0";
8724 /* Something in the delay slot, but we've got a long branch. */
8725 return "add%I1 %1,%0,%0\n\tb %3";
8728 output_asm_insn ("add%I1 %1,%0,%0", operands);
8729 return output_lbranch (operands[3], insn, 1);
8732 /* Return nonzero if INSN (a jump insn) immediately follows a call
8733 to a named function. This is used to avoid filling the delay slot
8734 of the jump since it can usually be eliminated by modifying RP in
8735 the delay slot of the call. */
8738 following_call (rtx insn)
8740 if (! TARGET_JUMP_IN_DELAY)
8743 /* Find the previous real insn, skipping NOTEs. */
8744 insn = PREV_INSN (insn);
8745 while (insn && GET_CODE (insn) == NOTE)
8746 insn = PREV_INSN (insn);
8748 /* Check for CALL_INSNs and millicode calls. */
8750 && ((GET_CODE (insn) == CALL_INSN
8751 && get_attr_type (insn) != TYPE_DYNCALL)
8752 || (GET_CODE (insn) == INSN
8753 && GET_CODE (PATTERN (insn)) != SEQUENCE
8754 && GET_CODE (PATTERN (insn)) != USE
8755 && GET_CODE (PATTERN (insn)) != CLOBBER
8756 && get_attr_type (insn) == TYPE_MILLI)))
8762 /* We use this hook to perform a PA specific optimization which is difficult
8763 to do in earlier passes.
8765 We want the delay slots of branches within jump tables to be filled.
8766 None of the compiler passes at the moment even has the notion that a
8767 PA jump table doesn't contain addresses, but instead contains actual
8770 Because we actually jump into the table, the addresses of each entry
8771 must stay constant in relation to the beginning of the table (which
8772 itself must stay constant relative to the instruction to jump into
8773 it). I don't believe we can guarantee earlier passes of the compiler
8774 will adhere to those rules.
8776 So, late in the compilation process we find all the jump tables, and
8777 expand them into real code -- e.g. each entry in the jump table vector
8778 will get an appropriate label followed by a jump to the final target.
8780 Reorg and the final jump pass can then optimize these branches and
8781 fill their delay slots. We end up with smaller, more efficient code.
8783 The jump instructions within the table are special; we must be able
8784 to identify them during assembly output (if the jumps don't get filled
8785 we need to emit a nop rather than nullifying the delay slot)). We
8786 identify jumps in switch tables by using insns with the attribute
8787 type TYPE_BTABLE_BRANCH.
8789 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8790 insns. This serves two purposes, first it prevents jump.c from
8791 noticing that the last N entries in the table jump to the instruction
8792 immediately after the table and deleting the jumps. Second, those
8793 insns mark where we should emit .begin_brtab and .end_brtab directives
8794 when using GAS (allows for better link time optimizations). */
8801 remove_useless_addtr_insns (1);
8803 if (pa_cpu < PROCESSOR_8000)
8804 pa_combine_instructions ();
8807 /* This is fairly cheap, so always run it if optimizing. */
8808 if (optimize > 0 && !TARGET_BIG_SWITCH)
8810 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8811 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8813 rtx pattern, tmp, location, label;
8814 unsigned int length, i;
8816 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8817 if (GET_CODE (insn) != JUMP_INSN
8818 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8819 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8822 /* Emit marker for the beginning of the branch table. */
8823 emit_insn_before (gen_begin_brtab (), insn);
8825 pattern = PATTERN (insn);
8826 location = PREV_INSN (insn);
8827 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8829 for (i = 0; i < length; i++)
8831 /* Emit a label before each jump to keep jump.c from
8832 removing this code. */
8833 tmp = gen_label_rtx ();
8834 LABEL_NUSES (tmp) = 1;
8835 emit_label_after (tmp, location);
8836 location = NEXT_INSN (location);
8838 if (GET_CODE (pattern) == ADDR_VEC)
8839 label = XEXP (XVECEXP (pattern, 0, i), 0);
8841 label = XEXP (XVECEXP (pattern, 1, i), 0);
8843 tmp = gen_short_jump (label);
8845 /* Emit the jump itself. */
8846 tmp = emit_jump_insn_after (tmp, location);
8847 JUMP_LABEL (tmp) = label;
8848 LABEL_NUSES (label)++;
8849 location = NEXT_INSN (location);
8851 /* Emit a BARRIER after the jump. */
8852 emit_barrier_after (location);
8853 location = NEXT_INSN (location);
8856 /* Emit marker for the end of the branch table. */
8857 emit_insn_before (gen_end_brtab (), location);
8858 location = NEXT_INSN (location);
8859 emit_barrier_after (location);
8861 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8867 /* Still need brtab marker insns. FIXME: the presence of these
8868 markers disables output of the branch table to readonly memory,
8869 and any alignment directives that might be needed. Possibly,
8870 the begin_brtab insn should be output before the label for the
8871 table. This doesn't matter at the moment since the tables are
8872 always output in the text section. */
8873 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8875 /* Find an ADDR_VEC insn. */
8876 if (GET_CODE (insn) != JUMP_INSN
8877 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8878 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8881 /* Now generate markers for the beginning and end of the
8883 emit_insn_before (gen_begin_brtab (), insn);
8884 emit_insn_after (gen_end_brtab (), insn);
8889 /* The PA has a number of odd instructions which can perform multiple
8890 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8891 it may be profitable to combine two instructions into one instruction
8892 with two outputs. It's not profitable PA2.0 machines because the
8893 two outputs would take two slots in the reorder buffers.
8895 This routine finds instructions which can be combined and combines
8896 them. We only support some of the potential combinations, and we
8897 only try common ways to find suitable instructions.
8899 * addb can add two registers or a register and a small integer
8900 and jump to a nearby (+-8k) location. Normally the jump to the
8901 nearby location is conditional on the result of the add, but by
8902 using the "true" condition we can make the jump unconditional.
8903 Thus addb can perform two independent operations in one insn.
8905 * movb is similar to addb in that it can perform a reg->reg
8906 or small immediate->reg copy and jump to a nearby (+-8k location).
8908 * fmpyadd and fmpysub can perform a FP multiply and either an
8909 FP add or FP sub if the operands of the multiply and add/sub are
8910 independent (there are other minor restrictions). Note both
8911 the fmpy and fadd/fsub can in theory move to better spots according
8912 to data dependencies, but for now we require the fmpy stay at a
8915 * Many of the memory operations can perform pre & post updates
8916 of index registers. GCC's pre/post increment/decrement addressing
8917 is far too simple to take advantage of all the possibilities. This
8918 pass may not be suitable since those insns may not be independent.
8920 * comclr can compare two ints or an int and a register, nullify
8921 the following instruction and zero some other register. This
8922 is more difficult to use as it's harder to find an insn which
8923 will generate a comclr than finding something like an unconditional
8924 branch. (conditional moves & long branches create comclr insns).
8926 * Most arithmetic operations can conditionally skip the next
8927 instruction. They can be viewed as "perform this operation
8928 and conditionally jump to this nearby location" (where nearby
8929 is an insns away). These are difficult to use due to the
8930 branch length restrictions. */
8933 pa_combine_instructions (void)
8935 rtx anchor, new_rtx;
8937 /* This can get expensive since the basic algorithm is on the
8938 order of O(n^2) (or worse). Only do it for -O2 or higher
8939 levels of optimization. */
8943 /* Walk down the list of insns looking for "anchor" insns which
8944 may be combined with "floating" insns. As the name implies,
8945 "anchor" instructions don't move, while "floating" insns may
8947 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8948 new_rtx = make_insn_raw (new_rtx);
8950 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8952 enum attr_pa_combine_type anchor_attr;
8953 enum attr_pa_combine_type floater_attr;
8955 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8956 Also ignore any special USE insns. */
8957 if ((GET_CODE (anchor) != INSN
8958 && GET_CODE (anchor) != JUMP_INSN
8959 && GET_CODE (anchor) != CALL_INSN)
8960 || GET_CODE (PATTERN (anchor)) == USE
8961 || GET_CODE (PATTERN (anchor)) == CLOBBER
8962 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8963 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8966 anchor_attr = get_attr_pa_combine_type (anchor);
8967 /* See if anchor is an insn suitable for combination. */
8968 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8969 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8970 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8971 && ! forward_branch_p (anchor)))
8975 for (floater = PREV_INSN (anchor);
8977 floater = PREV_INSN (floater))
8979 if (GET_CODE (floater) == NOTE
8980 || (GET_CODE (floater) == INSN
8981 && (GET_CODE (PATTERN (floater)) == USE
8982 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8985 /* Anything except a regular INSN will stop our search. */
8986 if (GET_CODE (floater) != INSN
8987 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8988 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8994 /* See if FLOATER is suitable for combination with the
8996 floater_attr = get_attr_pa_combine_type (floater);
8997 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8998 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8999 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9000 && floater_attr == PA_COMBINE_TYPE_FMPY))
9002 /* If ANCHOR and FLOATER can be combined, then we're
9003 done with this pass. */
9004 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9005 SET_DEST (PATTERN (floater)),
9006 XEXP (SET_SRC (PATTERN (floater)), 0),
9007 XEXP (SET_SRC (PATTERN (floater)), 1)))
9011 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9012 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9014 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9016 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9017 SET_DEST (PATTERN (floater)),
9018 XEXP (SET_SRC (PATTERN (floater)), 0),
9019 XEXP (SET_SRC (PATTERN (floater)), 1)))
9024 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9025 SET_DEST (PATTERN (floater)),
9026 SET_SRC (PATTERN (floater)),
9027 SET_SRC (PATTERN (floater))))
9033 /* If we didn't find anything on the backwards scan try forwards. */
9035 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9036 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9038 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9040 if (GET_CODE (floater) == NOTE
9041 || (GET_CODE (floater) == INSN
9042 && (GET_CODE (PATTERN (floater)) == USE
9043 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9047 /* Anything except a regular INSN will stop our search. */
9048 if (GET_CODE (floater) != INSN
9049 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9050 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9056 /* See if FLOATER is suitable for combination with the
9058 floater_attr = get_attr_pa_combine_type (floater);
9059 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9060 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9061 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9062 && floater_attr == PA_COMBINE_TYPE_FMPY))
9064 /* If ANCHOR and FLOATER can be combined, then we're
9065 done with this pass. */
9066 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9067 SET_DEST (PATTERN (floater)),
9068 XEXP (SET_SRC (PATTERN (floater)),
9070 XEXP (SET_SRC (PATTERN (floater)),
9077 /* FLOATER will be nonzero if we found a suitable floating
9078 insn for combination with ANCHOR. */
9080 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9081 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9083 /* Emit the new instruction and delete the old anchor. */
9084 emit_insn_before (gen_rtx_PARALLEL
9086 gen_rtvec (2, PATTERN (anchor),
9087 PATTERN (floater))),
9090 SET_INSN_DELETED (anchor);
9092 /* Emit a special USE insn for FLOATER, then delete
9093 the floating insn. */
9094 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9095 delete_insn (floater);
9100 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9103 /* Emit the new_jump instruction and delete the old anchor. */
9105 = emit_jump_insn_before (gen_rtx_PARALLEL
9107 gen_rtvec (2, PATTERN (anchor),
9108 PATTERN (floater))),
9111 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9112 SET_INSN_DELETED (anchor);
9114 /* Emit a special USE insn for FLOATER, then delete
9115 the floating insn. */
9116 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9117 delete_insn (floater);
9125 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
9128 int insn_code_number;
9131 /* Create a PARALLEL with the patterns of ANCHOR and
9132 FLOATER, try to recognize it, then test constraints
9133 for the resulting pattern.
9135 If the pattern doesn't match or the constraints
9136 aren't met keep searching for a suitable floater
9138 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9139 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9140 INSN_CODE (new_rtx) = -1;
9141 insn_code_number = recog_memoized (new_rtx);
9142 if (insn_code_number < 0
9143 || (extract_insn (new_rtx), ! constrain_operands (1)))
9157 /* There's up to three operands to consider. One
9158 output and two inputs.
9160 The output must not be used between FLOATER & ANCHOR
9161 exclusive. The inputs must not be set between
9162 FLOATER and ANCHOR exclusive. */
9164 if (reg_used_between_p (dest, start, end))
9167 if (reg_set_between_p (src1, start, end))
9170 if (reg_set_between_p (src2, start, end))
9173 /* If we get here, then everything is good. */
9177 /* Return nonzero if references for INSN are delayed.
9179 Millicode insns are actually function calls with some special
9180 constraints on arguments and register usage.
9182 Millicode calls always expect their arguments in the integer argument
9183 registers, and always return their result in %r29 (ret1). They
9184 are expected to clobber their arguments, %r1, %r29, and the return
9185 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9187 This function tells reorg that the references to arguments and
9188 millicode calls do not appear to happen until after the millicode call.
9189 This allows reorg to put insns which set the argument registers into the
9190 delay slot of the millicode call -- thus they act more like traditional
9193 Note we cannot consider side effects of the insn to be delayed because
9194 the branch and link insn will clobber the return pointer. If we happened
9195 to use the return pointer in the delay slot of the call, then we lose.
9197 get_attr_type will try to recognize the given insn, so make sure to
9198 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9201 insn_refs_are_delayed (rtx insn)
9203 return ((GET_CODE (insn) == INSN
9204 && GET_CODE (PATTERN (insn)) != SEQUENCE
9205 && GET_CODE (PATTERN (insn)) != USE
9206 && GET_CODE (PATTERN (insn)) != CLOBBER
9207 && get_attr_type (insn) == TYPE_MILLI));
9210 /* Promote the return value, but not the arguments. */
9212 static enum machine_mode
9213 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9214 enum machine_mode mode,
9215 int *punsignedp ATTRIBUTE_UNUSED,
9216 const_tree fntype ATTRIBUTE_UNUSED,
9219 if (for_return == 0)
9221 return promote_mode (type, mode, punsignedp);
9224 /* On the HP-PA the value is found in register(s) 28(-29), unless
9225 the mode is SF or DF. Then the value is returned in fr4 (32).
9227 This must perform the same promotions as PROMOTE_MODE, else promoting
9228 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9230 Small structures must be returned in a PARALLEL on PA64 in order
9231 to match the HP Compiler ABI. */
9234 pa_function_value (const_tree valtype,
9235 const_tree func ATTRIBUTE_UNUSED,
9236 bool outgoing ATTRIBUTE_UNUSED)
9238 enum machine_mode valmode;
9240 if (AGGREGATE_TYPE_P (valtype)
9241 || TREE_CODE (valtype) == COMPLEX_TYPE
9242 || TREE_CODE (valtype) == VECTOR_TYPE)
9246 /* Aggregates with a size less than or equal to 128 bits are
9247 returned in GR 28(-29). They are left justified. The pad
9248 bits are undefined. Larger aggregates are returned in
9252 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9254 for (i = 0; i < ub; i++)
9256 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9257 gen_rtx_REG (DImode, 28 + i),
9262 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9264 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9266 /* Aggregates 5 to 8 bytes in size are returned in general
9267 registers r28-r29 in the same manner as other non
9268 floating-point objects. The data is right-justified and
9269 zero-extended to 64 bits. This is opposite to the normal
9270 justification used on big endian targets and requires
9271 special treatment. */
9272 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9273 gen_rtx_REG (DImode, 28), const0_rtx);
9274 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9278 if ((INTEGRAL_TYPE_P (valtype)
9279 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9280 || POINTER_TYPE_P (valtype))
9281 valmode = word_mode;
9283 valmode = TYPE_MODE (valtype);
9285 if (TREE_CODE (valtype) == REAL_TYPE
9286 && !AGGREGATE_TYPE_P (valtype)
9287 && TYPE_MODE (valtype) != TFmode
9288 && !TARGET_SOFT_FLOAT)
9289 return gen_rtx_REG (valmode, 32);
9291 return gen_rtx_REG (valmode, 28);
9294 /* Return the location of a parameter that is passed in a register or NULL
9295 if the parameter has any component that is passed in memory.
9297 This is new code and will be pushed to into the net sources after
9300 ??? We might want to restructure this so that it looks more like other
9303 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
9304 int named ATTRIBUTE_UNUSED)
9306 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9313 if (mode == VOIDmode)
9316 arg_size = FUNCTION_ARG_SIZE (mode, type);
9318 /* If this arg would be passed partially or totally on the stack, then
9319 this routine should return zero. pa_arg_partial_bytes will
9320 handle arguments which are split between regs and stack slots if
9321 the ABI mandates split arguments. */
9324 /* The 32-bit ABI does not split arguments. */
9325 if (cum->words + arg_size > max_arg_words)
9331 alignment = cum->words & 1;
9332 if (cum->words + alignment >= max_arg_words)
9336 /* The 32bit ABIs and the 64bit ABIs are rather different,
9337 particularly in their handling of FP registers. We might
9338 be able to cleverly share code between them, but I'm not
9339 going to bother in the hope that splitting them up results
9340 in code that is more easily understood. */
9344 /* Advance the base registers to their current locations.
9346 Remember, gprs grow towards smaller register numbers while
9347 fprs grow to higher register numbers. Also remember that
9348 although FP regs are 32-bit addressable, we pretend that
9349 the registers are 64-bits wide. */
9350 gpr_reg_base = 26 - cum->words;
9351 fpr_reg_base = 32 + cum->words;
9353 /* Arguments wider than one word and small aggregates need special
9357 || (type && (AGGREGATE_TYPE_P (type)
9358 || TREE_CODE (type) == COMPLEX_TYPE
9359 || TREE_CODE (type) == VECTOR_TYPE)))
9361 /* Double-extended precision (80-bit), quad-precision (128-bit)
9362 and aggregates including complex numbers are aligned on
9363 128-bit boundaries. The first eight 64-bit argument slots
9364 are associated one-to-one, with general registers r26
9365 through r19, and also with floating-point registers fr4
9366 through fr11. Arguments larger than one word are always
9367 passed in general registers.
9369 Using a PARALLEL with a word mode register results in left
9370 justified data on a big-endian target. */
9373 int i, offset = 0, ub = arg_size;
9375 /* Align the base register. */
9376 gpr_reg_base -= alignment;
9378 ub = MIN (ub, max_arg_words - cum->words - alignment);
9379 for (i = 0; i < ub; i++)
9381 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9382 gen_rtx_REG (DImode, gpr_reg_base),
9388 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9393 /* If the argument is larger than a word, then we know precisely
9394 which registers we must use. */
9408 /* Structures 5 to 8 bytes in size are passed in the general
9409 registers in the same manner as other non floating-point
9410 objects. The data is right-justified and zero-extended
9411 to 64 bits. This is opposite to the normal justification
9412 used on big endian targets and requires special treatment.
9413 We now define BLOCK_REG_PADDING to pad these objects.
9414 Aggregates, complex and vector types are passed in the same
9415 manner as structures. */
9417 || (type && (AGGREGATE_TYPE_P (type)
9418 || TREE_CODE (type) == COMPLEX_TYPE
9419 || TREE_CODE (type) == VECTOR_TYPE)))
9421 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9422 gen_rtx_REG (DImode, gpr_reg_base),
9424 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9429 /* We have a single word (32 bits). A simple computation
9430 will get us the register #s we need. */
9431 gpr_reg_base = 26 - cum->words;
9432 fpr_reg_base = 32 + 2 * cum->words;
9436 /* Determine if the argument needs to be passed in both general and
9437 floating point registers. */
9438 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9439 /* If we are doing soft-float with portable runtime, then there
9440 is no need to worry about FP regs. */
9441 && !TARGET_SOFT_FLOAT
9442 /* The parameter must be some kind of scalar float, else we just
9443 pass it in integer registers. */
9444 && GET_MODE_CLASS (mode) == MODE_FLOAT
9445 /* The target function must not have a prototype. */
9446 && cum->nargs_prototype <= 0
9447 /* libcalls do not need to pass items in both FP and general
9449 && type != NULL_TREE
9450 /* All this hair applies to "outgoing" args only. This includes
9451 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9453 /* Also pass outgoing floating arguments in both registers in indirect
9454 calls with the 32 bit ABI and the HP assembler since there is no
9455 way to the specify argument locations in static functions. */
9460 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9466 gen_rtx_EXPR_LIST (VOIDmode,
9467 gen_rtx_REG (mode, fpr_reg_base),
9469 gen_rtx_EXPR_LIST (VOIDmode,
9470 gen_rtx_REG (mode, gpr_reg_base),
9475 /* See if we should pass this parameter in a general register. */
9476 if (TARGET_SOFT_FLOAT
9477 /* Indirect calls in the normal 32bit ABI require all arguments
9478 to be passed in general registers. */
9479 || (!TARGET_PORTABLE_RUNTIME
9483 /* If the parameter is not a scalar floating-point parameter,
9484 then it belongs in GPRs. */
9485 || GET_MODE_CLASS (mode) != MODE_FLOAT
9486 /* Structure with single SFmode field belongs in GPR. */
9487 || (type && AGGREGATE_TYPE_P (type)))
9488 retval = gen_rtx_REG (mode, gpr_reg_base);
9490 retval = gen_rtx_REG (mode, fpr_reg_base);
9496 /* If this arg would be passed totally in registers or totally on the stack,
9497 then this routine should return zero. */
9500 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9501 tree type, bool named ATTRIBUTE_UNUSED)
9503 unsigned int max_arg_words = 8;
9504 unsigned int offset = 0;
9509 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9512 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9513 /* Arg fits fully into registers. */
9515 else if (cum->words + offset >= max_arg_words)
9516 /* Arg fully on the stack. */
9520 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9524 /* A get_unnamed_section callback for switching to the text section.
9526 This function is only used with SOM. Because we don't support
9527 named subspaces, we can only create a new subspace or switch back
9528 to the default text subspace. */
9531 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9533 gcc_assert (TARGET_SOM);
9536 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9538 /* We only want to emit a .nsubspa directive once at the
9539 start of the function. */
9540 cfun->machine->in_nsubspa = 1;
9542 /* Create a new subspace for the text. This provides
9543 better stub placement and one-only functions. */
9545 && DECL_ONE_ONLY (cfun->decl)
9546 && !DECL_WEAK (cfun->decl))
9548 output_section_asm_op ("\t.SPACE $TEXT$\n"
9549 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9550 "ACCESS=44,SORT=24,COMDAT");
9556 /* There isn't a current function or the body of the current
9557 function has been completed. So, we are changing to the
9558 text section to output debugging information. Thus, we
9559 need to forget that we are in the text section so that
9560 varasm.c will call us when text_section is selected again. */
9561 gcc_assert (!cfun || !cfun->machine
9562 || cfun->machine->in_nsubspa == 2);
9565 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9568 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9571 /* A get_unnamed_section callback for switching to comdat data
9572 sections. This function is only used with SOM. */
9575 som_output_comdat_data_section_asm_op (const void *data)
9578 output_section_asm_op (data);
9581 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9584 pa_som_asm_init_sections (void)
9587 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9589 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9590 is not being generated. */
9591 som_readonly_data_section
9592 = get_unnamed_section (0, output_section_asm_op,
9593 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9595 /* When secondary definitions are not supported, SOM makes readonly
9596 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9598 som_one_only_readonly_data_section
9599 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9601 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9602 "ACCESS=0x2c,SORT=16,COMDAT");
9605 /* When secondary definitions are not supported, SOM makes data one-only
9606 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9607 som_one_only_data_section
9608 = get_unnamed_section (SECTION_WRITE,
9609 som_output_comdat_data_section_asm_op,
9610 "\t.SPACE $PRIVATE$\n"
9611 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9612 "ACCESS=31,SORT=24,COMDAT");
9614 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9615 which reference data within the $TEXT$ space (for example constant
9616 strings in the $LIT$ subspace).
9618 The assemblers (GAS and HP as) both have problems with handling
9619 the difference of two symbols which is the other correct way to
9620 reference constant data during PIC code generation.
9622 So, there's no way to reference constant data which is in the
9623 $TEXT$ space during PIC generation. Instead place all constant
9624 data into the $PRIVATE$ subspace (this reduces sharing, but it
9625 works correctly). */
9626 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9628 /* We must not have a reference to an external symbol defined in a
9629 shared library in a readonly section, else the SOM linker will
9632 So, we force exception information into the data section. */
9633 exception_section = data_section;
9636 /* On hpux10, the linker will give an error if we have a reference
9637 in the read-only data section to a symbol defined in a shared
9638 library. Therefore, expressions that might require a reloc can
9639 not be placed in the read-only data section. */
9642 pa_select_section (tree exp, int reloc,
9643 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9645 if (TREE_CODE (exp) == VAR_DECL
9646 && TREE_READONLY (exp)
9647 && !TREE_THIS_VOLATILE (exp)
9648 && DECL_INITIAL (exp)
9649 && (DECL_INITIAL (exp) == error_mark_node
9650 || TREE_CONSTANT (DECL_INITIAL (exp)))
9654 && DECL_ONE_ONLY (exp)
9655 && !DECL_WEAK (exp))
9656 return som_one_only_readonly_data_section;
9658 return readonly_data_section;
9660 else if (CONSTANT_CLASS_P (exp) && !reloc)
9661 return readonly_data_section;
9663 && TREE_CODE (exp) == VAR_DECL
9664 && DECL_ONE_ONLY (exp)
9665 && !DECL_WEAK (exp))
9666 return som_one_only_data_section;
9668 return data_section;
9672 pa_globalize_label (FILE *stream, const char *name)
9674 /* We only handle DATA objects here, functions are globalized in
9675 ASM_DECLARE_FUNCTION_NAME. */
9676 if (! FUNCTION_NAME_P (name))
9678 fputs ("\t.EXPORT ", stream);
9679 assemble_name (stream, name);
9680 fputs (",DATA\n", stream);
9684 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9687 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9688 int incoming ATTRIBUTE_UNUSED)
9690 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9693 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9696 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9698 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9699 PA64 ABI says that objects larger than 128 bits are returned in memory.
9700 Note, int_size_in_bytes can return -1 if the size of the object is
9701 variable or larger than the maximum value that can be expressed as
9702 a HOST_WIDE_INT. It can also return zero for an empty type. The
9703 simplest way to handle variable and empty types is to pass them in
9704 memory. This avoids problems in defining the boundaries of argument
9705 slots, allocating registers, etc. */
9706 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9707 || int_size_in_bytes (type) <= 0);
9710 /* Structure to hold declaration and name of external symbols that are
9711 emitted by GCC. We generate a vector of these symbols and output them
9712 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9713 This avoids putting out names that are never really used. */
9715 typedef struct GTY(()) extern_symbol
9721 /* Define gc'd vector type for extern_symbol. */
9722 DEF_VEC_O(extern_symbol);
9723 DEF_VEC_ALLOC_O(extern_symbol,gc);
9725 /* Vector of extern_symbol pointers. */
9726 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9728 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9729 /* Mark DECL (name NAME) as an external reference (assembler output
9730 file FILE). This saves the names to output at the end of the file
9731 if actually referenced. */
9734 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9736 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9738 gcc_assert (file == asm_out_file);
9743 /* Output text required at the end of an assembler file.
9744 This includes deferred plabels and .import directives for
9745 all external symbols that were actually referenced. */
9748 pa_hpux_file_end (void)
9753 if (!NO_DEFERRED_PROFILE_COUNTERS)
9754 output_deferred_profile_counters ();
9756 output_deferred_plabels ();
9758 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9760 tree decl = p->decl;
9762 if (!TREE_ASM_WRITTEN (decl)
9763 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9764 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9767 VEC_free (extern_symbol, gc, extern_symbols);
9771 /* Return true if a change from mode FROM to mode TO for a register
9772 in register class RCLASS is invalid. */
9775 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9776 enum reg_class rclass)
9781 /* Reject changes to/from complex and vector modes. */
9782 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9783 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9786 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9789 /* There is no way to load QImode or HImode values directly from
9790 memory. SImode loads to the FP registers are not zero extended.
9791 On the 64-bit target, this conflicts with the definition of
9792 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9793 with different sizes in the floating-point registers. */
9794 if (MAYBE_FP_REG_CLASS_P (rclass))
9797 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9798 in specific sets of registers. Thus, we cannot allow changing
9799 to a larger mode when it's larger than a word. */
9800 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9801 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9807 /* Returns TRUE if it is a good idea to tie two pseudo registers
9808 when one has mode MODE1 and one has mode MODE2.
9809 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9810 for any hard reg, then this must be FALSE for correct output.
9812 We should return FALSE for QImode and HImode because these modes
9813 are not ok in the floating-point registers. However, this prevents
9814 tieing these modes to SImode and DImode in the general registers.
9815 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9816 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9817 in the floating-point registers. */
9820 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9822 /* Don't tie modes in different classes. */
9823 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9830 /* Length in units of the trampoline instruction code. */
9832 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9835 /* Output assembler code for a block containing the constant parts
9836 of a trampoline, leaving space for the variable parts.\
9838 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9839 and then branches to the specified routine.
9841 This code template is copied from text segment to stack location
9842 and then patched with pa_trampoline_init to contain valid values,
9843 and then entered as a subroutine.
9845 It is best to keep this as small as possible to avoid having to
9846 flush multiple lines in the cache. */
9849 pa_asm_trampoline_template (FILE *f)
9853 fputs ("\tldw 36(%r22),%r21\n", f);
9854 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
9855 if (ASSEMBLER_DIALECT == 0)
9856 fputs ("\tdepi 0,31,2,%r21\n", f);
9858 fputs ("\tdepwi 0,31,2,%r21\n", f);
9859 fputs ("\tldw 4(%r21),%r19\n", f);
9860 fputs ("\tldw 0(%r21),%r21\n", f);
9863 fputs ("\tbve (%r21)\n", f);
9864 fputs ("\tldw 40(%r22),%r29\n", f);
9865 fputs ("\t.word 0\n", f);
9866 fputs ("\t.word 0\n", f);
9870 fputs ("\tldsid (%r21),%r1\n", f);
9871 fputs ("\tmtsp %r1,%sr0\n", f);
9872 fputs ("\tbe 0(%sr0,%r21)\n", f);
9873 fputs ("\tldw 40(%r22),%r29\n", f);
9875 fputs ("\t.word 0\n", f);
9876 fputs ("\t.word 0\n", f);
9877 fputs ("\t.word 0\n", f);
9878 fputs ("\t.word 0\n", f);
9882 fputs ("\t.dword 0\n", f);
9883 fputs ("\t.dword 0\n", f);
9884 fputs ("\t.dword 0\n", f);
9885 fputs ("\t.dword 0\n", f);
9886 fputs ("\tmfia %r31\n", f);
9887 fputs ("\tldd 24(%r31),%r1\n", f);
9888 fputs ("\tldd 24(%r1),%r27\n", f);
9889 fputs ("\tldd 16(%r1),%r1\n", f);
9890 fputs ("\tbve (%r1)\n", f);
9891 fputs ("\tldd 32(%r31),%r31\n", f);
9892 fputs ("\t.dword 0 ; fptr\n", f);
9893 fputs ("\t.dword 0 ; static link\n", f);
9897 /* Emit RTL insns to initialize the variable parts of a trampoline.
9898 FNADDR is an RTX for the address of the function's pure code.
9899 CXT is an RTX for the static chain value for the function.
9901 Move the function address to the trampoline template at offset 36.
9902 Move the static chain value to trampoline template at offset 40.
9903 Move the trampoline address to trampoline template at offset 44.
9904 Move r19 to trampoline template at offset 48. The latter two
9905 words create a plabel for the indirect call to the trampoline.
9907 A similar sequence is used for the 64-bit port but the plabel is
9908 at the beginning of the trampoline.
9910 Finally, the cache entries for the trampoline code are flushed.
9911 This is necessary to ensure that the trampoline instruction sequence
9912 is written to memory prior to any attempts at prefetching the code
9916 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
9918 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
9919 rtx start_addr = gen_reg_rtx (Pmode);
9920 rtx end_addr = gen_reg_rtx (Pmode);
9921 rtx line_length = gen_reg_rtx (Pmode);
9924 emit_block_move (m_tramp, assemble_trampoline_template (),
9925 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
9926 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
9930 tmp = adjust_address (m_tramp, Pmode, 36);
9931 emit_move_insn (tmp, fnaddr);
9932 tmp = adjust_address (m_tramp, Pmode, 40);
9933 emit_move_insn (tmp, chain_value);
9935 /* Create a fat pointer for the trampoline. */
9936 tmp = adjust_address (m_tramp, Pmode, 44);
9937 emit_move_insn (tmp, r_tramp);
9938 tmp = adjust_address (m_tramp, Pmode, 48);
9939 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
9941 /* fdc and fic only use registers for the address to flush,
9942 they do not accept integer displacements. We align the
9943 start and end addresses to the beginning of their respective
9944 cache lines to minimize the number of lines flushed. */
9945 emit_insn (gen_andsi3 (start_addr, r_tramp,
9946 GEN_INT (-MIN_CACHELINE_SIZE)));
9947 tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1));
9948 emit_insn (gen_andsi3 (end_addr, tmp,
9949 GEN_INT (-MIN_CACHELINE_SIZE)));
9950 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
9951 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
9952 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
9953 gen_reg_rtx (Pmode),
9954 gen_reg_rtx (Pmode)));
9958 tmp = adjust_address (m_tramp, Pmode, 56);
9959 emit_move_insn (tmp, fnaddr);
9960 tmp = adjust_address (m_tramp, Pmode, 64);
9961 emit_move_insn (tmp, chain_value);
9963 /* Create a fat pointer for the trampoline. */
9964 tmp = adjust_address (m_tramp, Pmode, 16);
9965 emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32)));
9966 tmp = adjust_address (m_tramp, Pmode, 24);
9967 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
9969 /* fdc and fic only use registers for the address to flush,
9970 they do not accept integer displacements. We align the
9971 start and end addresses to the beginning of their respective
9972 cache lines to minimize the number of lines flushed. */
9973 tmp = force_reg (Pmode, plus_constant (r_tramp, 32));
9974 emit_insn (gen_anddi3 (start_addr, tmp,
9975 GEN_INT (-MIN_CACHELINE_SIZE)));
9976 tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1));
9977 emit_insn (gen_anddi3 (end_addr, tmp,
9978 GEN_INT (-MIN_CACHELINE_SIZE)));
9979 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
9980 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
9981 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
9982 gen_reg_rtx (Pmode),
9983 gen_reg_rtx (Pmode)));
9987 /* Perform any machine-specific adjustment in the address of the trampoline.
9988 ADDR contains the address that was passed to pa_trampoline_init.
9989 Adjust the trampoline address to point to the plabel at offset 44. */
9992 pa_trampoline_adjust_address (rtx addr)
9995 addr = memory_address (Pmode, plus_constant (addr, 46));
10000 pa_delegitimize_address (rtx orig_x)
10002 rtx x = delegitimize_mem_from_attrs (orig_x);
10004 if (GET_CODE (x) == LO_SUM
10005 && GET_CODE (XEXP (x, 1)) == UNSPEC
10006 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10007 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));