1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "integrate.h"
49 #include "target-def.h"
52 /* Return nonzero if there is a bypass for the output of
53 OUT_INSN and the fp store IN_INSN. */
55 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
57 enum machine_mode store_mode;
58 enum machine_mode other_mode;
61 if (recog_memoized (in_insn) < 0
62 || (get_attr_type (in_insn) != TYPE_FPSTORE
63 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
64 || recog_memoized (out_insn) < 0)
67 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
69 set = single_set (out_insn);
73 other_mode = GET_MODE (SET_SRC (set));
75 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
79 #ifndef DO_FRAME_NOTES
80 #ifdef INCOMING_RETURN_ADDR_RTX
81 #define DO_FRAME_NOTES 1
83 #define DO_FRAME_NOTES 0
87 static void copy_reg_pointer (rtx, rtx);
88 static void fix_range (const char *);
89 static bool pa_handle_option (size_t, const char *, int);
90 static int hppa_address_cost (rtx, bool);
91 static bool hppa_rtx_costs (rtx, int, int, int *, bool);
92 static inline rtx force_mode (enum machine_mode, rtx);
93 static void pa_reorg (void);
94 static void pa_combine_instructions (void);
95 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
96 static bool forward_branch_p (rtx);
97 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
98 static int compute_movmem_length (rtx);
99 static int compute_clrmem_length (rtx);
100 static bool pa_assemble_integer (rtx, unsigned int, int);
101 static void remove_useless_addtr_insns (int);
102 static void store_reg (int, HOST_WIDE_INT, int);
103 static void store_reg_modify (int, int, HOST_WIDE_INT);
104 static void load_reg (int, HOST_WIDE_INT, int);
105 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
106 static rtx pa_function_value (const_tree, const_tree, bool);
107 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
108 static void update_total_code_bytes (unsigned int);
109 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
110 static int pa_adjust_cost (rtx, rtx, rtx, int);
111 static int pa_adjust_priority (rtx, int);
112 static int pa_issue_rate (void);
113 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
114 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
116 static void pa_encode_section_info (tree, rtx, int);
117 static const char *pa_strip_name_encoding (const char *);
118 static bool pa_function_ok_for_sibcall (tree, tree);
119 static void pa_globalize_label (FILE *, const char *)
121 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
122 HOST_WIDE_INT, tree);
123 #if !defined(USE_COLLECT2)
124 static void pa_asm_out_constructor (rtx, int);
125 static void pa_asm_out_destructor (rtx, int);
127 static void pa_init_builtins (void);
128 static rtx hppa_builtin_saveregs (void);
129 static void hppa_va_start (tree, rtx);
130 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
131 static bool pa_scalar_mode_supported_p (enum machine_mode);
132 static bool pa_commutative_p (const_rtx x, int outer_code);
133 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
134 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
135 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
136 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
137 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
138 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
139 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
140 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
141 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
142 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
143 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
144 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
145 static void output_deferred_plabels (void);
146 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
147 #ifdef ASM_OUTPUT_EXTERNAL_REAL
148 static void pa_hpux_file_end (void);
150 #ifdef HPUX_LONG_DOUBLE_LIBRARY
151 static void pa_hpux_init_libfuncs (void);
153 static rtx pa_struct_value_rtx (tree, int);
154 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
156 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
158 static struct machine_function * pa_init_machine_status (void);
159 static enum reg_class pa_secondary_reload (bool, rtx, enum reg_class,
161 secondary_reload_info *);
162 static void pa_extra_live_on_entry (bitmap);
163 static enum machine_mode pa_promote_function_mode (const_tree,
164 enum machine_mode, int *,
167 static void pa_asm_trampoline_template (FILE *);
168 static void pa_trampoline_init (rtx, tree, rtx);
169 static rtx pa_trampoline_adjust_address (rtx);
171 /* The following extra sections are only used for SOM. */
172 static GTY(()) section *som_readonly_data_section;
173 static GTY(()) section *som_one_only_readonly_data_section;
174 static GTY(()) section *som_one_only_data_section;
176 /* Which cpu we are scheduling for. */
177 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
179 /* The UNIX standard to use for predefines and linking. */
180 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
182 /* Counts for the number of callee-saved general and floating point
183 registers which were saved by the current function's prologue. */
184 static int gr_saved, fr_saved;
186 /* Boolean indicating whether the return pointer was saved by the
187 current function's prologue. */
188 static bool rp_saved;
190 static rtx find_addr_reg (rtx);
192 /* Keep track of the number of bytes we have output in the CODE subspace
193 during this compilation so we'll know when to emit inline long-calls. */
194 unsigned long total_code_bytes;
196 /* The last address of the previous function plus the number of bytes in
197 associated thunks that have been output. This is used to determine if
198 a thunk can use an IA-relative branch to reach its target function. */
199 static unsigned int last_address;
201 /* Variables to handle plabels that we discover are necessary at assembly
202 output time. They are output after the current function. */
203 struct GTY(()) deferred_plabel
208 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
210 static size_t n_deferred_plabels = 0;
213 /* Initialize the GCC target structure. */
215 #undef TARGET_ASM_ALIGNED_HI_OP
216 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
217 #undef TARGET_ASM_ALIGNED_SI_OP
218 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
219 #undef TARGET_ASM_ALIGNED_DI_OP
220 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
221 #undef TARGET_ASM_UNALIGNED_HI_OP
222 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
223 #undef TARGET_ASM_UNALIGNED_SI_OP
224 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
225 #undef TARGET_ASM_UNALIGNED_DI_OP
226 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
227 #undef TARGET_ASM_INTEGER
228 #define TARGET_ASM_INTEGER pa_assemble_integer
230 #undef TARGET_ASM_FUNCTION_PROLOGUE
231 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
232 #undef TARGET_ASM_FUNCTION_EPILOGUE
233 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
235 #undef TARGET_FUNCTION_VALUE
236 #define TARGET_FUNCTION_VALUE pa_function_value
238 #undef TARGET_LEGITIMIZE_ADDRESS
239 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
241 #undef TARGET_SCHED_ADJUST_COST
242 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
243 #undef TARGET_SCHED_ADJUST_PRIORITY
244 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
245 #undef TARGET_SCHED_ISSUE_RATE
246 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
248 #undef TARGET_ENCODE_SECTION_INFO
249 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
250 #undef TARGET_STRIP_NAME_ENCODING
251 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
253 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
254 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
256 #undef TARGET_COMMUTATIVE_P
257 #define TARGET_COMMUTATIVE_P pa_commutative_p
259 #undef TARGET_ASM_OUTPUT_MI_THUNK
260 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
261 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
262 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
264 #undef TARGET_ASM_FILE_END
265 #ifdef ASM_OUTPUT_EXTERNAL_REAL
266 #define TARGET_ASM_FILE_END pa_hpux_file_end
268 #define TARGET_ASM_FILE_END output_deferred_plabels
271 #if !defined(USE_COLLECT2)
272 #undef TARGET_ASM_CONSTRUCTOR
273 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
274 #undef TARGET_ASM_DESTRUCTOR
275 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
278 #undef TARGET_DEFAULT_TARGET_FLAGS
279 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
280 #undef TARGET_HANDLE_OPTION
281 #define TARGET_HANDLE_OPTION pa_handle_option
283 #undef TARGET_INIT_BUILTINS
284 #define TARGET_INIT_BUILTINS pa_init_builtins
286 #undef TARGET_RTX_COSTS
287 #define TARGET_RTX_COSTS hppa_rtx_costs
288 #undef TARGET_ADDRESS_COST
289 #define TARGET_ADDRESS_COST hppa_address_cost
291 #undef TARGET_MACHINE_DEPENDENT_REORG
292 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
294 #ifdef HPUX_LONG_DOUBLE_LIBRARY
295 #undef TARGET_INIT_LIBFUNCS
296 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
299 #undef TARGET_PROMOTE_FUNCTION_MODE
300 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
301 #undef TARGET_PROMOTE_PROTOTYPES
302 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
304 #undef TARGET_STRUCT_VALUE_RTX
305 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
306 #undef TARGET_RETURN_IN_MEMORY
307 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
308 #undef TARGET_MUST_PASS_IN_STACK
309 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
310 #undef TARGET_PASS_BY_REFERENCE
311 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
312 #undef TARGET_CALLEE_COPIES
313 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
314 #undef TARGET_ARG_PARTIAL_BYTES
315 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
317 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
318 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
319 #undef TARGET_EXPAND_BUILTIN_VA_START
320 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
321 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
322 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
324 #undef TARGET_SCALAR_MODE_SUPPORTED_P
325 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
327 #undef TARGET_CANNOT_FORCE_CONST_MEM
328 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
330 #undef TARGET_SECONDARY_RELOAD
331 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
333 #undef TARGET_EXTRA_LIVE_ON_ENTRY
334 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
336 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
337 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
338 #undef TARGET_TRAMPOLINE_INIT
339 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
340 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
341 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
343 struct gcc_target targetm = TARGET_INITIALIZER;
345 /* Parse the -mfixed-range= option string. */
348 fix_range (const char *const_str)
351 char *str, *dash, *comma;
353 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
354 REG2 are either register names or register numbers. The effect
355 of this option is to mark the registers in the range from REG1 to
356 REG2 as ``fixed'' so they won't be used by the compiler. This is
357 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
359 i = strlen (const_str);
360 str = (char *) alloca (i + 1);
361 memcpy (str, const_str, i + 1);
365 dash = strchr (str, '-');
368 warning (0, "value of -mfixed-range must have form REG1-REG2");
373 comma = strchr (dash + 1, ',');
377 first = decode_reg_name (str);
380 warning (0, "unknown register name: %s", str);
384 last = decode_reg_name (dash + 1);
387 warning (0, "unknown register name: %s", dash + 1);
395 warning (0, "%s-%s is an empty range", str, dash + 1);
399 for (i = first; i <= last; ++i)
400 fixed_regs[i] = call_used_regs[i] = 1;
409 /* Check if all floating point registers have been fixed. */
410 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
415 target_flags |= MASK_DISABLE_FPREGS;
418 /* Implement TARGET_HANDLE_OPTION. */
421 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
426 case OPT_mpa_risc_1_0:
428 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
432 case OPT_mpa_risc_1_1:
434 target_flags &= ~MASK_PA_20;
435 target_flags |= MASK_PA_11;
438 case OPT_mpa_risc_2_0:
440 target_flags |= MASK_PA_11 | MASK_PA_20;
444 if (strcmp (arg, "8000") == 0)
445 pa_cpu = PROCESSOR_8000;
446 else if (strcmp (arg, "7100") == 0)
447 pa_cpu = PROCESSOR_7100;
448 else if (strcmp (arg, "700") == 0)
449 pa_cpu = PROCESSOR_700;
450 else if (strcmp (arg, "7100LC") == 0)
451 pa_cpu = PROCESSOR_7100LC;
452 else if (strcmp (arg, "7200") == 0)
453 pa_cpu = PROCESSOR_7200;
454 else if (strcmp (arg, "7300") == 0)
455 pa_cpu = PROCESSOR_7300;
460 case OPT_mfixed_range_:
470 #if TARGET_HPUX_10_10
476 #if TARGET_HPUX_11_11
488 override_options (void)
490 /* Unconditional branches in the delay slot are not compatible with dwarf2
491 call frame information. There is no benefit in using this optimization
492 on PA8000 and later processors. */
493 if (pa_cpu >= PROCESSOR_8000
494 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
495 || flag_unwind_tables)
496 target_flags &= ~MASK_JUMP_IN_DELAY;
498 if (flag_pic && TARGET_PORTABLE_RUNTIME)
500 warning (0, "PIC code generation is not supported in the portable runtime model");
503 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
505 warning (0, "PIC code generation is not compatible with fast indirect calls");
508 if (! TARGET_GAS && write_symbols != NO_DEBUG)
510 warning (0, "-g is only supported when using GAS on this processor,");
511 warning (0, "-g option disabled");
512 write_symbols = NO_DEBUG;
515 /* We only support the "big PIC" model now. And we always generate PIC
516 code when in 64bit mode. */
517 if (flag_pic == 1 || TARGET_64BIT)
520 /* We can't guarantee that .dword is available for 32-bit targets. */
521 if (UNITS_PER_WORD == 4)
522 targetm.asm_out.aligned_op.di = NULL;
524 /* The unaligned ops are only available when using GAS. */
527 targetm.asm_out.unaligned_op.hi = NULL;
528 targetm.asm_out.unaligned_op.si = NULL;
529 targetm.asm_out.unaligned_op.di = NULL;
532 init_machine_status = pa_init_machine_status;
536 pa_init_builtins (void)
538 #ifdef DONT_HAVE_FPUTC_UNLOCKED
539 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
540 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
541 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
542 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
545 if (built_in_decls [BUILT_IN_FINITE])
546 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
547 if (built_in_decls [BUILT_IN_FINITEF])
548 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
552 /* Function to init struct machine_function.
553 This will be called, via a pointer variable,
554 from push_function_context. */
556 static struct machine_function *
557 pa_init_machine_status (void)
559 return GGC_CNEW (machine_function);
562 /* If FROM is a probable pointer register, mark TO as a probable
563 pointer register with the same pointer alignment as FROM. */
566 copy_reg_pointer (rtx to, rtx from)
568 if (REG_POINTER (from))
569 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
572 /* Return 1 if X contains a symbolic expression. We know these
573 expressions will have one of a few well defined forms, so
574 we need only check those forms. */
576 symbolic_expression_p (rtx x)
579 /* Strip off any HIGH. */
580 if (GET_CODE (x) == HIGH)
583 return (symbolic_operand (x, VOIDmode));
586 /* Accept any constant that can be moved in one instruction into a
589 cint_ok_for_move (HOST_WIDE_INT ival)
591 /* OK if ldo, ldil, or zdepi, can be used. */
592 return (VAL_14_BITS_P (ival)
593 || ldil_cint_p (ival)
594 || zdepi_cint_p (ival));
597 /* Return truth value of whether OP can be used as an operand in a
600 adddi3_operand (rtx op, enum machine_mode mode)
602 return (register_operand (op, mode)
603 || (GET_CODE (op) == CONST_INT
604 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
607 /* True iff the operand OP can be used as the destination operand of
608 an integer store. This also implies the operand could be used as
609 the source operand of an integer load. Symbolic, lo_sum and indexed
610 memory operands are not allowed. We accept reloading pseudos and
611 other memory operands. */
613 integer_store_memory_operand (rtx op, enum machine_mode mode)
615 return ((reload_in_progress
617 && REGNO (op) >= FIRST_PSEUDO_REGISTER
618 && reg_renumber [REGNO (op)] < 0)
619 || (GET_CODE (op) == MEM
620 && (reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
621 && !symbolic_memory_operand (op, VOIDmode)
622 && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
623 && !IS_INDEX_ADDR_P (XEXP (op, 0))));
626 /* True iff ldil can be used to load this CONST_INT. The least
627 significant 11 bits of the value must be zero and the value must
628 not change sign when extended from 32 to 64 bits. */
630 ldil_cint_p (HOST_WIDE_INT ival)
632 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
634 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
637 /* True iff zdepi can be used to generate this CONST_INT.
638 zdepi first sign extends a 5-bit signed number to a given field
639 length, then places this field anywhere in a zero. */
641 zdepi_cint_p (unsigned HOST_WIDE_INT x)
643 unsigned HOST_WIDE_INT lsb_mask, t;
645 /* This might not be obvious, but it's at least fast.
646 This function is critical; we don't have the time loops would take. */
648 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
649 /* Return true iff t is a power of two. */
650 return ((t & (t - 1)) == 0);
653 /* True iff depi or extru can be used to compute (reg & mask).
654 Accept bit pattern like these:
659 and_mask_p (unsigned HOST_WIDE_INT mask)
662 mask += mask & -mask;
663 return (mask & (mask - 1)) == 0;
666 /* True iff depi can be used to compute (reg | MASK). */
668 ior_mask_p (unsigned HOST_WIDE_INT mask)
670 mask += mask & -mask;
671 return (mask & (mask - 1)) == 0;
674 /* Legitimize PIC addresses. If the address is already
675 position-independent, we return ORIG. Newly generated
676 position-independent addresses go to REG. If we need more
677 than one register, we lose. */
680 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
684 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
686 /* Labels need special handling. */
687 if (pic_label_operand (orig, mode))
691 /* We do not want to go through the movXX expanders here since that
692 would create recursion.
694 Nor do we really want to call a generator for a named pattern
695 since that requires multiple patterns if we want to support
698 So instead we just emit the raw set, which avoids the movXX
699 expanders completely. */
700 mark_reg_pointer (reg, BITS_PER_UNIT);
701 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
703 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
704 add_reg_note (insn, REG_EQUAL, orig);
706 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
707 and update LABEL_NUSES because this is not done automatically. */
708 if (reload_in_progress || reload_completed)
710 /* Extract LABEL_REF. */
711 if (GET_CODE (orig) == CONST)
712 orig = XEXP (XEXP (orig, 0), 0);
713 /* Extract CODE_LABEL. */
714 orig = XEXP (orig, 0);
715 add_reg_note (insn, REG_LABEL_OPERAND, orig);
716 LABEL_NUSES (orig)++;
718 crtl->uses_pic_offset_table = 1;
721 if (GET_CODE (orig) == SYMBOL_REF)
727 /* Before reload, allocate a temporary register for the intermediate
728 result. This allows the sequence to be deleted when the final
729 result is unused and the insns are trivially dead. */
730 tmp_reg = ((reload_in_progress || reload_completed)
731 ? reg : gen_reg_rtx (Pmode));
733 if (function_label_operand (orig, mode))
735 /* Force function label into memory in word mode. */
736 orig = XEXP (force_const_mem (word_mode, orig), 0);
737 /* Load plabel address from DLT. */
738 emit_move_insn (tmp_reg,
739 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
740 gen_rtx_HIGH (word_mode, orig)));
742 = gen_const_mem (Pmode,
743 gen_rtx_LO_SUM (Pmode, tmp_reg,
744 gen_rtx_UNSPEC (Pmode,
747 emit_move_insn (reg, pic_ref);
748 /* Now load address of function descriptor. */
749 pic_ref = gen_rtx_MEM (Pmode, reg);
753 /* Load symbol reference from DLT. */
754 emit_move_insn (tmp_reg,
755 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
756 gen_rtx_HIGH (word_mode, orig)));
758 = gen_const_mem (Pmode,
759 gen_rtx_LO_SUM (Pmode, tmp_reg,
760 gen_rtx_UNSPEC (Pmode,
765 crtl->uses_pic_offset_table = 1;
766 mark_reg_pointer (reg, BITS_PER_UNIT);
767 insn = emit_move_insn (reg, pic_ref);
769 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
770 set_unique_reg_note (insn, REG_EQUAL, orig);
774 else if (GET_CODE (orig) == CONST)
778 if (GET_CODE (XEXP (orig, 0)) == PLUS
779 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
783 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
785 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
786 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
787 base == reg ? 0 : reg);
789 if (GET_CODE (orig) == CONST_INT)
791 if (INT_14_BITS (orig))
792 return plus_constant (base, INTVAL (orig));
793 orig = force_reg (Pmode, orig);
795 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
796 /* Likewise, should we set special REG_NOTEs here? */
802 static GTY(()) rtx gen_tls_tga;
805 gen_tls_get_addr (void)
808 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
813 hppa_tls_call (rtx arg)
817 ret = gen_reg_rtx (Pmode);
818 emit_library_call_value (gen_tls_get_addr (), ret,
819 LCT_CONST, Pmode, 1, arg, Pmode);
825 legitimize_tls_address (rtx addr)
827 rtx ret, insn, tmp, t1, t2, tp;
828 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
832 case TLS_MODEL_GLOBAL_DYNAMIC:
833 tmp = gen_reg_rtx (Pmode);
835 emit_insn (gen_tgd_load_pic (tmp, addr));
837 emit_insn (gen_tgd_load (tmp, addr));
838 ret = hppa_tls_call (tmp);
841 case TLS_MODEL_LOCAL_DYNAMIC:
842 ret = gen_reg_rtx (Pmode);
843 tmp = gen_reg_rtx (Pmode);
846 emit_insn (gen_tld_load_pic (tmp, addr));
848 emit_insn (gen_tld_load (tmp, addr));
849 t1 = hppa_tls_call (tmp);
852 t2 = gen_reg_rtx (Pmode);
853 emit_libcall_block (insn, t2, t1,
854 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
856 emit_insn (gen_tld_offset_load (ret, addr, t2));
859 case TLS_MODEL_INITIAL_EXEC:
860 tp = gen_reg_rtx (Pmode);
861 tmp = gen_reg_rtx (Pmode);
862 ret = gen_reg_rtx (Pmode);
863 emit_insn (gen_tp_load (tp));
865 emit_insn (gen_tie_load_pic (tmp, addr));
867 emit_insn (gen_tie_load (tmp, addr));
868 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
871 case TLS_MODEL_LOCAL_EXEC:
872 tp = gen_reg_rtx (Pmode);
873 ret = gen_reg_rtx (Pmode);
874 emit_insn (gen_tp_load (tp));
875 emit_insn (gen_tle_load (ret, addr, tp));
885 /* Try machine-dependent ways of modifying an illegitimate address
886 to be legitimate. If we find one, return the new, valid address.
887 This macro is used in only one place: `memory_address' in explow.c.
889 OLDX is the address as it was before break_out_memory_refs was called.
890 In some cases it is useful to look at this to decide what needs to be done.
892 It is always safe for this macro to do nothing. It exists to recognize
893 opportunities to optimize the output.
895 For the PA, transform:
897 memory(X + <large int>)
901 if (<large int> & mask) >= 16
902 Y = (<large int> & ~mask) + mask + 1 Round up.
904 Y = (<large int> & ~mask) Round down.
906 memory (Z + (<large int> - Y));
908 This is for CSE to find several similar references, and only use one Z.
910 X can either be a SYMBOL_REF or REG, but because combine cannot
911 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
912 D will not fit in 14 bits.
914 MODE_FLOAT references allow displacements which fit in 5 bits, so use
917 MODE_INT references allow displacements which fit in 14 bits, so use
920 This relies on the fact that most mode MODE_FLOAT references will use FP
921 registers and most mode MODE_INT references will use integer registers.
922 (In the rare case of an FP register used in an integer MODE, we depend
923 on secondary reloads to clean things up.)
926 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
927 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
928 addressing modes to be used).
930 Put X and Z into registers. Then put the entire expression into
934 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
935 enum machine_mode mode)
939 /* We need to canonicalize the order of operands in unscaled indexed
940 addresses since the code that checks if an address is valid doesn't
941 always try both orders. */
942 if (!TARGET_NO_SPACE_REGS
943 && GET_CODE (x) == PLUS
944 && GET_MODE (x) == Pmode
945 && REG_P (XEXP (x, 0))
946 && REG_P (XEXP (x, 1))
947 && REG_POINTER (XEXP (x, 0))
948 && !REG_POINTER (XEXP (x, 1)))
949 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
951 if (PA_SYMBOL_REF_TLS_P (x))
952 return legitimize_tls_address (x);
954 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
956 /* Strip off CONST. */
957 if (GET_CODE (x) == CONST)
960 /* Special case. Get the SYMBOL_REF into a register and use indexing.
961 That should always be safe. */
962 if (GET_CODE (x) == PLUS
963 && GET_CODE (XEXP (x, 0)) == REG
964 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
966 rtx reg = force_reg (Pmode, XEXP (x, 1));
967 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
970 /* Note we must reject symbols which represent function addresses
971 since the assembler/linker can't handle arithmetic on plabels. */
972 if (GET_CODE (x) == PLUS
973 && GET_CODE (XEXP (x, 1)) == CONST_INT
974 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
975 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
976 || GET_CODE (XEXP (x, 0)) == REG))
978 rtx int_part, ptr_reg;
980 int offset = INTVAL (XEXP (x, 1));
983 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
984 ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
986 /* Choose which way to round the offset. Round up if we
987 are >= halfway to the next boundary. */
988 if ((offset & mask) >= ((mask + 1) / 2))
989 newoffset = (offset & ~ mask) + mask + 1;
991 newoffset = (offset & ~ mask);
993 /* If the newoffset will not fit in 14 bits (ldo), then
994 handling this would take 4 or 5 instructions (2 to load
995 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
996 add the new offset and the SYMBOL_REF.) Combine can
997 not handle 4->2 or 5->2 combinations, so do not create
999 if (! VAL_14_BITS_P (newoffset)
1000 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1002 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1005 gen_rtx_HIGH (Pmode, const_part));
1008 gen_rtx_LO_SUM (Pmode,
1009 tmp_reg, const_part));
1013 if (! VAL_14_BITS_P (newoffset))
1014 int_part = force_reg (Pmode, GEN_INT (newoffset));
1016 int_part = GEN_INT (newoffset);
1018 ptr_reg = force_reg (Pmode,
1019 gen_rtx_PLUS (Pmode,
1020 force_reg (Pmode, XEXP (x, 0)),
1023 return plus_constant (ptr_reg, offset - newoffset);
1026 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1028 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1029 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1030 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1031 && (OBJECT_P (XEXP (x, 1))
1032 || GET_CODE (XEXP (x, 1)) == SUBREG)
1033 && GET_CODE (XEXP (x, 1)) != CONST)
1035 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1039 if (GET_CODE (reg1) != REG)
1040 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1042 reg2 = XEXP (XEXP (x, 0), 0);
1043 if (GET_CODE (reg2) != REG)
1044 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1046 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1047 gen_rtx_MULT (Pmode,
1053 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1055 Only do so for floating point modes since this is more speculative
1056 and we lose if it's an integer store. */
1057 if (GET_CODE (x) == PLUS
1058 && GET_CODE (XEXP (x, 0)) == PLUS
1059 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1060 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1061 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1062 && (mode == SFmode || mode == DFmode))
1065 /* First, try and figure out what to use as a base register. */
1066 rtx reg1, reg2, base, idx, orig_base;
1068 reg1 = XEXP (XEXP (x, 0), 1);
1073 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1074 then emit_move_sequence will turn on REG_POINTER so we'll know
1075 it's a base register below. */
1076 if (GET_CODE (reg1) != REG)
1077 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1079 if (GET_CODE (reg2) != REG)
1080 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1082 /* Figure out what the base and index are. */
1084 if (GET_CODE (reg1) == REG
1085 && REG_POINTER (reg1))
1088 orig_base = XEXP (XEXP (x, 0), 1);
1089 idx = gen_rtx_PLUS (Pmode,
1090 gen_rtx_MULT (Pmode,
1091 XEXP (XEXP (XEXP (x, 0), 0), 0),
1092 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1095 else if (GET_CODE (reg2) == REG
1096 && REG_POINTER (reg2))
1099 orig_base = XEXP (x, 1);
1106 /* If the index adds a large constant, try to scale the
1107 constant so that it can be loaded with only one insn. */
1108 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1109 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1110 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1111 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1113 /* Divide the CONST_INT by the scale factor, then add it to A. */
1114 int val = INTVAL (XEXP (idx, 1));
1116 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1117 reg1 = XEXP (XEXP (idx, 0), 0);
1118 if (GET_CODE (reg1) != REG)
1119 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1121 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1123 /* We can now generate a simple scaled indexed address. */
1126 (Pmode, gen_rtx_PLUS (Pmode,
1127 gen_rtx_MULT (Pmode, reg1,
1128 XEXP (XEXP (idx, 0), 1)),
1132 /* If B + C is still a valid base register, then add them. */
1133 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1134 && INTVAL (XEXP (idx, 1)) <= 4096
1135 && INTVAL (XEXP (idx, 1)) >= -4096)
1137 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1140 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1142 reg2 = XEXP (XEXP (idx, 0), 0);
1143 if (GET_CODE (reg2) != CONST_INT)
1144 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1146 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1147 gen_rtx_MULT (Pmode,
1153 /* Get the index into a register, then add the base + index and
1154 return a register holding the result. */
1156 /* First get A into a register. */
1157 reg1 = XEXP (XEXP (idx, 0), 0);
1158 if (GET_CODE (reg1) != REG)
1159 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1161 /* And get B into a register. */
1162 reg2 = XEXP (idx, 1);
1163 if (GET_CODE (reg2) != REG)
1164 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1166 reg1 = force_reg (Pmode,
1167 gen_rtx_PLUS (Pmode,
1168 gen_rtx_MULT (Pmode, reg1,
1169 XEXP (XEXP (idx, 0), 1)),
1172 /* Add the result to our base register and return. */
1173 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1177 /* Uh-oh. We might have an address for x[n-100000]. This needs
1178 special handling to avoid creating an indexed memory address
1179 with x-100000 as the base.
1181 If the constant part is small enough, then it's still safe because
1182 there is a guard page at the beginning and end of the data segment.
1184 Scaled references are common enough that we want to try and rearrange the
1185 terms so that we can use indexing for these addresses too. Only
1186 do the optimization for floatint point modes. */
1188 if (GET_CODE (x) == PLUS
1189 && symbolic_expression_p (XEXP (x, 1)))
1191 /* Ugly. We modify things here so that the address offset specified
1192 by the index expression is computed first, then added to x to form
1193 the entire address. */
1195 rtx regx1, regx2, regy1, regy2, y;
1197 /* Strip off any CONST. */
1199 if (GET_CODE (y) == CONST)
1202 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1204 /* See if this looks like
1205 (plus (mult (reg) (shadd_const))
1206 (const (plus (symbol_ref) (const_int))))
1208 Where const_int is small. In that case the const
1209 expression is a valid pointer for indexing.
1211 If const_int is big, but can be divided evenly by shadd_const
1212 and added to (reg). This allows more scaled indexed addresses. */
1213 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1214 && GET_CODE (XEXP (x, 0)) == MULT
1215 && GET_CODE (XEXP (y, 1)) == CONST_INT
1216 && INTVAL (XEXP (y, 1)) >= -4096
1217 && INTVAL (XEXP (y, 1)) <= 4095
1218 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1219 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1221 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1225 if (GET_CODE (reg1) != REG)
1226 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1228 reg2 = XEXP (XEXP (x, 0), 0);
1229 if (GET_CODE (reg2) != REG)
1230 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1232 return force_reg (Pmode,
1233 gen_rtx_PLUS (Pmode,
1234 gen_rtx_MULT (Pmode,
1239 else if ((mode == DFmode || mode == SFmode)
1240 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1241 && GET_CODE (XEXP (x, 0)) == MULT
1242 && GET_CODE (XEXP (y, 1)) == CONST_INT
1243 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1244 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1245 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1248 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1249 / INTVAL (XEXP (XEXP (x, 0), 1))));
1250 regx2 = XEXP (XEXP (x, 0), 0);
1251 if (GET_CODE (regx2) != REG)
1252 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1253 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1257 gen_rtx_PLUS (Pmode,
1258 gen_rtx_MULT (Pmode, regx2,
1259 XEXP (XEXP (x, 0), 1)),
1260 force_reg (Pmode, XEXP (y, 0))));
1262 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1263 && INTVAL (XEXP (y, 1)) >= -4096
1264 && INTVAL (XEXP (y, 1)) <= 4095)
1266 /* This is safe because of the guard page at the
1267 beginning and end of the data space. Just
1268 return the original address. */
1273 /* Doesn't look like one we can optimize. */
1274 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1275 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1276 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1277 regx1 = force_reg (Pmode,
1278 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1280 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1288 /* For the HPPA, REG and REG+CONST is cost 0
1289 and addresses involving symbolic constants are cost 2.
1291 PIC addresses are very expensive.
1293 It is no coincidence that this has the same structure
1294 as GO_IF_LEGITIMATE_ADDRESS. */
1297 hppa_address_cost (rtx X,
1298 bool speed ATTRIBUTE_UNUSED)
1300 switch (GET_CODE (X))
1313 /* Compute a (partial) cost for rtx X. Return true if the complete
1314 cost has been computed, and false if subexpressions should be
1315 scanned. In either case, *TOTAL contains the cost result. */
1318 hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
1319 bool speed ATTRIBUTE_UNUSED)
1324 if (INTVAL (x) == 0)
1326 else if (INT_14_BITS (x))
1343 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1344 && outer_code != SET)
1351 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1352 *total = COSTS_N_INSNS (3);
1353 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1354 *total = COSTS_N_INSNS (8);
1356 *total = COSTS_N_INSNS (20);
1360 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1362 *total = COSTS_N_INSNS (14);
1370 *total = COSTS_N_INSNS (60);
1373 case PLUS: /* this includes shNadd insns */
1375 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1376 *total = COSTS_N_INSNS (3);
1378 *total = COSTS_N_INSNS (1);
1384 *total = COSTS_N_INSNS (1);
1392 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1393 new rtx with the correct mode. */
1395 force_mode (enum machine_mode mode, rtx orig)
1397 if (mode == GET_MODE (orig))
1400 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1402 return gen_rtx_REG (mode, REGNO (orig));
1405 /* Return 1 if *X is a thread-local symbol. */
1408 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1410 return PA_SYMBOL_REF_TLS_P (*x);
1413 /* Return 1 if X contains a thread-local symbol. */
1416 pa_tls_referenced_p (rtx x)
1418 if (!TARGET_HAVE_TLS)
1421 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1424 /* Emit insns to move operands[1] into operands[0].
1426 Return 1 if we have written out everything that needs to be done to
1427 do the move. Otherwise, return 0 and the caller will emit the move
1430 Note SCRATCH_REG may not be in the proper mode depending on how it
1431 will be used. This routine is responsible for creating a new copy
1432 of SCRATCH_REG in the proper mode. */
1435 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1437 register rtx operand0 = operands[0];
1438 register rtx operand1 = operands[1];
1441 /* We can only handle indexed addresses in the destination operand
1442 of floating point stores. Thus, we need to break out indexed
1443 addresses from the destination operand. */
1444 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1446 gcc_assert (can_create_pseudo_p ());
1448 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1449 operand0 = replace_equiv_address (operand0, tem);
1452 /* On targets with non-equivalent space registers, break out unscaled
1453 indexed addresses from the source operand before the final CSE.
1454 We have to do this because the REG_POINTER flag is not correctly
1455 carried through various optimization passes and CSE may substitute
1456 a pseudo without the pointer set for one with the pointer set. As
1457 a result, we loose various opportunities to create insns with
1458 unscaled indexed addresses. */
1459 if (!TARGET_NO_SPACE_REGS
1460 && !cse_not_expected
1461 && GET_CODE (operand1) == MEM
1462 && GET_CODE (XEXP (operand1, 0)) == PLUS
1463 && REG_P (XEXP (XEXP (operand1, 0), 0))
1464 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1466 = replace_equiv_address (operand1,
1467 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1470 && reload_in_progress && GET_CODE (operand0) == REG
1471 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1472 operand0 = reg_equiv_mem[REGNO (operand0)];
1473 else if (scratch_reg
1474 && reload_in_progress && GET_CODE (operand0) == SUBREG
1475 && GET_CODE (SUBREG_REG (operand0)) == REG
1476 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1478 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1479 the code which tracks sets/uses for delete_output_reload. */
1480 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1481 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1482 SUBREG_BYTE (operand0));
1483 operand0 = alter_subreg (&temp);
1487 && reload_in_progress && GET_CODE (operand1) == REG
1488 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1489 operand1 = reg_equiv_mem[REGNO (operand1)];
1490 else if (scratch_reg
1491 && reload_in_progress && GET_CODE (operand1) == SUBREG
1492 && GET_CODE (SUBREG_REG (operand1)) == REG
1493 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1495 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1496 the code which tracks sets/uses for delete_output_reload. */
1497 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1498 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1499 SUBREG_BYTE (operand1));
1500 operand1 = alter_subreg (&temp);
1503 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1504 && ((tem = find_replacement (&XEXP (operand0, 0)))
1505 != XEXP (operand0, 0)))
1506 operand0 = replace_equiv_address (operand0, tem);
1508 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1509 && ((tem = find_replacement (&XEXP (operand1, 0)))
1510 != XEXP (operand1, 0)))
1511 operand1 = replace_equiv_address (operand1, tem);
1513 /* Handle secondary reloads for loads/stores of FP registers from
1514 REG+D addresses where D does not fit in 5 or 14 bits, including
1515 (subreg (mem (addr))) cases. */
1517 && fp_reg_operand (operand0, mode)
1518 && ((GET_CODE (operand1) == MEM
1519 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1520 XEXP (operand1, 0)))
1521 || ((GET_CODE (operand1) == SUBREG
1522 && GET_CODE (XEXP (operand1, 0)) == MEM
1523 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1525 XEXP (XEXP (operand1, 0), 0))))))
1527 if (GET_CODE (operand1) == SUBREG)
1528 operand1 = XEXP (operand1, 0);
1530 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1531 it in WORD_MODE regardless of what mode it was originally given
1533 scratch_reg = force_mode (word_mode, scratch_reg);
1535 /* D might not fit in 14 bits either; for such cases load D into
1537 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1539 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1540 emit_move_insn (scratch_reg,
1541 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1543 XEXP (XEXP (operand1, 0), 0),
1547 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1548 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1549 replace_equiv_address (operand1, scratch_reg)));
1552 else if (scratch_reg
1553 && fp_reg_operand (operand1, mode)
1554 && ((GET_CODE (operand0) == MEM
1555 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1557 XEXP (operand0, 0)))
1558 || ((GET_CODE (operand0) == SUBREG)
1559 && GET_CODE (XEXP (operand0, 0)) == MEM
1560 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1562 XEXP (XEXP (operand0, 0), 0)))))
1564 if (GET_CODE (operand0) == SUBREG)
1565 operand0 = XEXP (operand0, 0);
1567 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1568 it in WORD_MODE regardless of what mode it was originally given
1570 scratch_reg = force_mode (word_mode, scratch_reg);
1572 /* D might not fit in 14 bits either; for such cases load D into
1574 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1576 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1577 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1580 XEXP (XEXP (operand0, 0),
1585 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1586 emit_insn (gen_rtx_SET (VOIDmode,
1587 replace_equiv_address (operand0, scratch_reg),
1591 /* Handle secondary reloads for loads of FP registers from constant
1592 expressions by forcing the constant into memory.
1594 Use scratch_reg to hold the address of the memory location.
1596 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1597 NO_REGS when presented with a const_int and a register class
1598 containing only FP registers. Doing so unfortunately creates
1599 more problems than it solves. Fix this for 2.5. */
1600 else if (scratch_reg
1601 && CONSTANT_P (operand1)
1602 && fp_reg_operand (operand0, mode))
1604 rtx const_mem, xoperands[2];
1606 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1607 it in WORD_MODE regardless of what mode it was originally given
1609 scratch_reg = force_mode (word_mode, scratch_reg);
1611 /* Force the constant into memory and put the address of the
1612 memory location into scratch_reg. */
1613 const_mem = force_const_mem (mode, operand1);
1614 xoperands[0] = scratch_reg;
1615 xoperands[1] = XEXP (const_mem, 0);
1616 emit_move_sequence (xoperands, Pmode, 0);
1618 /* Now load the destination register. */
1619 emit_insn (gen_rtx_SET (mode, operand0,
1620 replace_equiv_address (const_mem, scratch_reg)));
1623 /* Handle secondary reloads for SAR. These occur when trying to load
1624 the SAR from memory, FP register, or with a constant. */
1625 else if (scratch_reg
1626 && GET_CODE (operand0) == REG
1627 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1628 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1629 && (GET_CODE (operand1) == MEM
1630 || GET_CODE (operand1) == CONST_INT
1631 || (GET_CODE (operand1) == REG
1632 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1634 /* D might not fit in 14 bits either; for such cases load D into
1636 if (GET_CODE (operand1) == MEM
1637 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1639 /* We are reloading the address into the scratch register, so we
1640 want to make sure the scratch register is a full register. */
1641 scratch_reg = force_mode (word_mode, scratch_reg);
1643 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1644 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1647 XEXP (XEXP (operand1, 0),
1651 /* Now we are going to load the scratch register from memory,
1652 we want to load it in the same width as the original MEM,
1653 which must be the same as the width of the ultimate destination,
1655 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1657 emit_move_insn (scratch_reg,
1658 replace_equiv_address (operand1, scratch_reg));
1662 /* We want to load the scratch register using the same mode as
1663 the ultimate destination. */
1664 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1666 emit_move_insn (scratch_reg, operand1);
1669 /* And emit the insn to set the ultimate destination. We know that
1670 the scratch register has the same mode as the destination at this
1672 emit_move_insn (operand0, scratch_reg);
1675 /* Handle the most common case: storing into a register. */
1676 else if (register_operand (operand0, mode))
1678 if (register_operand (operand1, mode)
1679 || (GET_CODE (operand1) == CONST_INT
1680 && cint_ok_for_move (INTVAL (operand1)))
1681 || (operand1 == CONST0_RTX (mode))
1682 || (GET_CODE (operand1) == HIGH
1683 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1684 /* Only `general_operands' can come here, so MEM is ok. */
1685 || GET_CODE (operand1) == MEM)
1687 /* Various sets are created during RTL generation which don't
1688 have the REG_POINTER flag correctly set. After the CSE pass,
1689 instruction recognition can fail if we don't consistently
1690 set this flag when performing register copies. This should
1691 also improve the opportunities for creating insns that use
1692 unscaled indexing. */
1693 if (REG_P (operand0) && REG_P (operand1))
1695 if (REG_POINTER (operand1)
1696 && !REG_POINTER (operand0)
1697 && !HARD_REGISTER_P (operand0))
1698 copy_reg_pointer (operand0, operand1);
1699 else if (REG_POINTER (operand0)
1700 && !REG_POINTER (operand1)
1701 && !HARD_REGISTER_P (operand1))
1702 copy_reg_pointer (operand1, operand0);
1705 /* When MEMs are broken out, the REG_POINTER flag doesn't
1706 get set. In some cases, we can set the REG_POINTER flag
1707 from the declaration for the MEM. */
1708 if (REG_P (operand0)
1709 && GET_CODE (operand1) == MEM
1710 && !REG_POINTER (operand0))
1712 tree decl = MEM_EXPR (operand1);
1714 /* Set the register pointer flag and register alignment
1715 if the declaration for this memory reference is a
1716 pointer type. Fortran indirect argument references
1719 && !(flag_argument_noalias > 1
1720 && TREE_CODE (decl) == INDIRECT_REF
1721 && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1725 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1727 if (TREE_CODE (decl) == COMPONENT_REF)
1728 decl = TREE_OPERAND (decl, 1);
1730 type = TREE_TYPE (decl);
1731 type = strip_array_types (type);
1733 if (POINTER_TYPE_P (type))
1737 type = TREE_TYPE (type);
1738 /* Using TYPE_ALIGN_OK is rather conservative as
1739 only the ada frontend actually sets it. */
1740 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1742 mark_reg_pointer (operand0, align);
1747 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1751 else if (GET_CODE (operand0) == MEM)
1753 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1754 && !(reload_in_progress || reload_completed))
1756 rtx temp = gen_reg_rtx (DFmode);
1758 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1759 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1762 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1764 /* Run this case quickly. */
1765 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1768 if (! (reload_in_progress || reload_completed))
1770 operands[0] = validize_mem (operand0);
1771 operands[1] = operand1 = force_reg (mode, operand1);
1775 /* Simplify the source if we need to.
1776 Note we do have to handle function labels here, even though we do
1777 not consider them legitimate constants. Loop optimizations can
1778 call the emit_move_xxx with one as a source. */
1779 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1780 || function_label_operand (operand1, mode)
1781 || (GET_CODE (operand1) == HIGH
1782 && symbolic_operand (XEXP (operand1, 0), mode)))
1786 if (GET_CODE (operand1) == HIGH)
1789 operand1 = XEXP (operand1, 0);
1791 if (symbolic_operand (operand1, mode))
1793 /* Argh. The assembler and linker can't handle arithmetic
1796 So we force the plabel into memory, load operand0 from
1797 the memory location, then add in the constant part. */
1798 if ((GET_CODE (operand1) == CONST
1799 && GET_CODE (XEXP (operand1, 0)) == PLUS
1800 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1801 || function_label_operand (operand1, mode))
1803 rtx temp, const_part;
1805 /* Figure out what (if any) scratch register to use. */
1806 if (reload_in_progress || reload_completed)
1808 scratch_reg = scratch_reg ? scratch_reg : operand0;
1809 /* SCRATCH_REG will hold an address and maybe the actual
1810 data. We want it in WORD_MODE regardless of what mode it
1811 was originally given to us. */
1812 scratch_reg = force_mode (word_mode, scratch_reg);
1815 scratch_reg = gen_reg_rtx (Pmode);
1817 if (GET_CODE (operand1) == CONST)
1819 /* Save away the constant part of the expression. */
1820 const_part = XEXP (XEXP (operand1, 0), 1);
1821 gcc_assert (GET_CODE (const_part) == CONST_INT);
1823 /* Force the function label into memory. */
1824 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1828 /* No constant part. */
1829 const_part = NULL_RTX;
1831 /* Force the function label into memory. */
1832 temp = force_const_mem (mode, operand1);
1836 /* Get the address of the memory location. PIC-ify it if
1838 temp = XEXP (temp, 0);
1840 temp = legitimize_pic_address (temp, mode, scratch_reg);
1842 /* Put the address of the memory location into our destination
1845 emit_move_sequence (operands, mode, scratch_reg);
1847 /* Now load from the memory location into our destination
1849 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1850 emit_move_sequence (operands, mode, scratch_reg);
1852 /* And add back in the constant part. */
1853 if (const_part != NULL_RTX)
1854 expand_inc (operand0, const_part);
1863 if (reload_in_progress || reload_completed)
1865 temp = scratch_reg ? scratch_reg : operand0;
1866 /* TEMP will hold an address and maybe the actual
1867 data. We want it in WORD_MODE regardless of what mode it
1868 was originally given to us. */
1869 temp = force_mode (word_mode, temp);
1872 temp = gen_reg_rtx (Pmode);
1874 /* (const (plus (symbol) (const_int))) must be forced to
1875 memory during/after reload if the const_int will not fit
1877 if (GET_CODE (operand1) == CONST
1878 && GET_CODE (XEXP (operand1, 0)) == PLUS
1879 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1880 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1881 && (reload_completed || reload_in_progress)
1884 rtx const_mem = force_const_mem (mode, operand1);
1885 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1887 operands[1] = replace_equiv_address (const_mem, operands[1]);
1888 emit_move_sequence (operands, mode, temp);
1892 operands[1] = legitimize_pic_address (operand1, mode, temp);
1893 if (REG_P (operand0) && REG_P (operands[1]))
1894 copy_reg_pointer (operand0, operands[1]);
1895 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1898 /* On the HPPA, references to data space are supposed to use dp,
1899 register 27, but showing it in the RTL inhibits various cse
1900 and loop optimizations. */
1905 if (reload_in_progress || reload_completed)
1907 temp = scratch_reg ? scratch_reg : operand0;
1908 /* TEMP will hold an address and maybe the actual
1909 data. We want it in WORD_MODE regardless of what mode it
1910 was originally given to us. */
1911 temp = force_mode (word_mode, temp);
1914 temp = gen_reg_rtx (mode);
1916 /* Loading a SYMBOL_REF into a register makes that register
1917 safe to be used as the base in an indexed address.
1919 Don't mark hard registers though. That loses. */
1920 if (GET_CODE (operand0) == REG
1921 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1922 mark_reg_pointer (operand0, BITS_PER_UNIT);
1923 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1924 mark_reg_pointer (temp, BITS_PER_UNIT);
1927 set = gen_rtx_SET (mode, operand0, temp);
1929 set = gen_rtx_SET (VOIDmode,
1931 gen_rtx_LO_SUM (mode, temp, operand1));
1933 emit_insn (gen_rtx_SET (VOIDmode,
1935 gen_rtx_HIGH (mode, operand1)));
1941 else if (pa_tls_referenced_p (operand1))
1946 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1948 addend = XEXP (XEXP (tmp, 0), 1);
1949 tmp = XEXP (XEXP (tmp, 0), 0);
1952 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1953 tmp = legitimize_tls_address (tmp);
1956 tmp = gen_rtx_PLUS (mode, tmp, addend);
1957 tmp = force_operand (tmp, operands[0]);
1961 else if (GET_CODE (operand1) != CONST_INT
1962 || !cint_ok_for_move (INTVAL (operand1)))
1966 HOST_WIDE_INT value = 0;
1967 HOST_WIDE_INT insv = 0;
1970 if (GET_CODE (operand1) == CONST_INT)
1971 value = INTVAL (operand1);
1974 && GET_CODE (operand1) == CONST_INT
1975 && HOST_BITS_PER_WIDE_INT > 32
1976 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1980 /* Extract the low order 32 bits of the value and sign extend.
1981 If the new value is the same as the original value, we can
1982 can use the original value as-is. If the new value is
1983 different, we use it and insert the most-significant 32-bits
1984 of the original value into the final result. */
1985 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1986 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1989 #if HOST_BITS_PER_WIDE_INT > 32
1990 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
1994 operand1 = GEN_INT (nval);
1998 if (reload_in_progress || reload_completed)
1999 temp = scratch_reg ? scratch_reg : operand0;
2001 temp = gen_reg_rtx (mode);
2003 /* We don't directly split DImode constants on 32-bit targets
2004 because PLUS uses an 11-bit immediate and the insn sequence
2005 generated is not as efficient as the one using HIGH/LO_SUM. */
2006 if (GET_CODE (operand1) == CONST_INT
2007 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2008 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2011 /* Directly break constant into high and low parts. This
2012 provides better optimization opportunities because various
2013 passes recognize constants split with PLUS but not LO_SUM.
2014 We use a 14-bit signed low part except when the addition
2015 of 0x4000 to the high part might change the sign of the
2017 HOST_WIDE_INT low = value & 0x3fff;
2018 HOST_WIDE_INT high = value & ~ 0x3fff;
2022 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2030 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2031 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2035 emit_insn (gen_rtx_SET (VOIDmode, temp,
2036 gen_rtx_HIGH (mode, operand1)));
2037 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2040 insn = emit_move_insn (operands[0], operands[1]);
2042 /* Now insert the most significant 32 bits of the value
2043 into the register. When we don't have a second register
2044 available, it could take up to nine instructions to load
2045 a 64-bit integer constant. Prior to reload, we force
2046 constants that would take more than three instructions
2047 to load to the constant pool. During and after reload,
2048 we have to handle all possible values. */
2051 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2052 register and the value to be inserted is outside the
2053 range that can be loaded with three depdi instructions. */
2054 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2056 operand1 = GEN_INT (insv);
2058 emit_insn (gen_rtx_SET (VOIDmode, temp,
2059 gen_rtx_HIGH (mode, operand1)));
2060 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2061 emit_insn (gen_insv (operand0, GEN_INT (32),
2066 int len = 5, pos = 27;
2068 /* Insert the bits using the depdi instruction. */
2071 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2072 HOST_WIDE_INT sign = v5 < 0;
2074 /* Left extend the insertion. */
2075 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2076 while (pos > 0 && (insv & 1) == sign)
2078 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2083 emit_insn (gen_insv (operand0, GEN_INT (len),
2084 GEN_INT (pos), GEN_INT (v5)));
2086 len = pos > 0 && pos < 5 ? pos : 5;
2092 set_unique_reg_note (insn, REG_EQUAL, op1);
2097 /* Now have insn-emit do whatever it normally does. */
2101 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2102 it will need a link/runtime reloc). */
2105 reloc_needed (tree exp)
2109 switch (TREE_CODE (exp))
2114 case POINTER_PLUS_EXPR:
2117 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2118 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2122 case NON_LVALUE_EXPR:
2123 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2129 unsigned HOST_WIDE_INT ix;
2131 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2133 reloc |= reloc_needed (value);
2146 /* Does operand (which is a symbolic_operand) live in text space?
2147 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2151 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2153 if (GET_CODE (operand) == CONST)
2154 operand = XEXP (XEXP (operand, 0), 0);
2157 if (GET_CODE (operand) == SYMBOL_REF)
2158 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2162 if (GET_CODE (operand) == SYMBOL_REF)
2163 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2169 /* Return the best assembler insn template
2170 for moving operands[1] into operands[0] as a fullword. */
2172 singlemove_string (rtx *operands)
2174 HOST_WIDE_INT intval;
2176 if (GET_CODE (operands[0]) == MEM)
2177 return "stw %r1,%0";
2178 if (GET_CODE (operands[1]) == MEM)
2180 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2185 gcc_assert (GET_MODE (operands[1]) == SFmode);
2187 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2189 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2190 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2192 operands[1] = GEN_INT (i);
2193 /* Fall through to CONST_INT case. */
2195 if (GET_CODE (operands[1]) == CONST_INT)
2197 intval = INTVAL (operands[1]);
2199 if (VAL_14_BITS_P (intval))
2201 else if ((intval & 0x7ff) == 0)
2202 return "ldil L'%1,%0";
2203 else if (zdepi_cint_p (intval))
2204 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2206 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2208 return "copy %1,%0";
2212 /* Compute position (in OP[1]) and width (in OP[2])
2213 useful for copying IMM to a register using the zdepi
2214 instructions. Store the immediate value to insert in OP[0]. */
2216 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2220 /* Find the least significant set bit in IMM. */
2221 for (lsb = 0; lsb < 32; lsb++)
2228 /* Choose variants based on *sign* of the 5-bit field. */
2229 if ((imm & 0x10) == 0)
2230 len = (lsb <= 28) ? 4 : 32 - lsb;
2233 /* Find the width of the bitstring in IMM. */
2234 for (len = 5; len < 32 - lsb; len++)
2236 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2240 /* Sign extend IMM as a 5-bit value. */
2241 imm = (imm & 0xf) - 0x10;
2249 /* Compute position (in OP[1]) and width (in OP[2])
2250 useful for copying IMM to a register using the depdi,z
2251 instructions. Store the immediate value to insert in OP[0]. */
2253 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2255 int lsb, len, maxlen;
2257 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2259 /* Find the least significant set bit in IMM. */
2260 for (lsb = 0; lsb < maxlen; lsb++)
2267 /* Choose variants based on *sign* of the 5-bit field. */
2268 if ((imm & 0x10) == 0)
2269 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2272 /* Find the width of the bitstring in IMM. */
2273 for (len = 5; len < maxlen - lsb; len++)
2275 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2279 /* Extend length if host is narrow and IMM is negative. */
2280 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2283 /* Sign extend IMM as a 5-bit value. */
2284 imm = (imm & 0xf) - 0x10;
2292 /* Output assembler code to perform a doubleword move insn
2293 with operands OPERANDS. */
2296 output_move_double (rtx *operands)
2298 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2300 rtx addreg0 = 0, addreg1 = 0;
2302 /* First classify both operands. */
2304 if (REG_P (operands[0]))
2306 else if (offsettable_memref_p (operands[0]))
2308 else if (GET_CODE (operands[0]) == MEM)
2313 if (REG_P (operands[1]))
2315 else if (CONSTANT_P (operands[1]))
2317 else if (offsettable_memref_p (operands[1]))
2319 else if (GET_CODE (operands[1]) == MEM)
2324 /* Check for the cases that the operand constraints are not
2325 supposed to allow to happen. */
2326 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2328 /* Handle copies between general and floating registers. */
2330 if (optype0 == REGOP && optype1 == REGOP
2331 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2333 if (FP_REG_P (operands[0]))
2335 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2336 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2337 return "{fldds|fldd} -16(%%sp),%0";
2341 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2342 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2343 return "{ldws|ldw} -12(%%sp),%R0";
2347 /* Handle auto decrementing and incrementing loads and stores
2348 specifically, since the structure of the function doesn't work
2349 for them without major modification. Do it better when we learn
2350 this port about the general inc/dec addressing of PA.
2351 (This was written by tege. Chide him if it doesn't work.) */
2353 if (optype0 == MEMOP)
2355 /* We have to output the address syntax ourselves, since print_operand
2356 doesn't deal with the addresses we want to use. Fix this later. */
2358 rtx addr = XEXP (operands[0], 0);
2359 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2361 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2363 operands[0] = XEXP (addr, 0);
2364 gcc_assert (GET_CODE (operands[1]) == REG
2365 && GET_CODE (operands[0]) == REG);
2367 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2369 /* No overlap between high target register and address
2370 register. (We do this in a non-obvious way to
2371 save a register file writeback) */
2372 if (GET_CODE (addr) == POST_INC)
2373 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2374 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2376 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2378 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2380 operands[0] = XEXP (addr, 0);
2381 gcc_assert (GET_CODE (operands[1]) == REG
2382 && GET_CODE (operands[0]) == REG);
2384 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2385 /* No overlap between high target register and address
2386 register. (We do this in a non-obvious way to save a
2387 register file writeback) */
2388 if (GET_CODE (addr) == PRE_INC)
2389 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2390 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2393 if (optype1 == MEMOP)
2395 /* We have to output the address syntax ourselves, since print_operand
2396 doesn't deal with the addresses we want to use. Fix this later. */
2398 rtx addr = XEXP (operands[1], 0);
2399 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2401 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2403 operands[1] = XEXP (addr, 0);
2404 gcc_assert (GET_CODE (operands[0]) == REG
2405 && GET_CODE (operands[1]) == REG);
2407 if (!reg_overlap_mentioned_p (high_reg, addr))
2409 /* No overlap between high target register and address
2410 register. (We do this in a non-obvious way to
2411 save a register file writeback) */
2412 if (GET_CODE (addr) == POST_INC)
2413 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2414 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2418 /* This is an undefined situation. We should load into the
2419 address register *and* update that register. Probably
2420 we don't need to handle this at all. */
2421 if (GET_CODE (addr) == POST_INC)
2422 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2423 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2426 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2428 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2430 operands[1] = XEXP (addr, 0);
2431 gcc_assert (GET_CODE (operands[0]) == REG
2432 && GET_CODE (operands[1]) == REG);
2434 if (!reg_overlap_mentioned_p (high_reg, addr))
2436 /* No overlap between high target register and address
2437 register. (We do this in a non-obvious way to
2438 save a register file writeback) */
2439 if (GET_CODE (addr) == PRE_INC)
2440 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2441 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2445 /* This is an undefined situation. We should load into the
2446 address register *and* update that register. Probably
2447 we don't need to handle this at all. */
2448 if (GET_CODE (addr) == PRE_INC)
2449 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2450 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2453 else if (GET_CODE (addr) == PLUS
2454 && GET_CODE (XEXP (addr, 0)) == MULT)
2457 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2459 if (!reg_overlap_mentioned_p (high_reg, addr))
2461 xoperands[0] = high_reg;
2462 xoperands[1] = XEXP (addr, 1);
2463 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2464 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2465 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2467 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2471 xoperands[0] = high_reg;
2472 xoperands[1] = XEXP (addr, 1);
2473 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2474 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2475 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2477 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2482 /* If an operand is an unoffsettable memory ref, find a register
2483 we can increment temporarily to make it refer to the second word. */
2485 if (optype0 == MEMOP)
2486 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2488 if (optype1 == MEMOP)
2489 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2491 /* Ok, we can do one word at a time.
2492 Normally we do the low-numbered word first.
2494 In either case, set up in LATEHALF the operands to use
2495 for the high-numbered word and in some cases alter the
2496 operands in OPERANDS to be suitable for the low-numbered word. */
2498 if (optype0 == REGOP)
2499 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2500 else if (optype0 == OFFSOP)
2501 latehalf[0] = adjust_address (operands[0], SImode, 4);
2503 latehalf[0] = operands[0];
2505 if (optype1 == REGOP)
2506 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2507 else if (optype1 == OFFSOP)
2508 latehalf[1] = adjust_address (operands[1], SImode, 4);
2509 else if (optype1 == CNSTOP)
2510 split_double (operands[1], &operands[1], &latehalf[1]);
2512 latehalf[1] = operands[1];
2514 /* If the first move would clobber the source of the second one,
2515 do them in the other order.
2517 This can happen in two cases:
2519 mem -> register where the first half of the destination register
2520 is the same register used in the memory's address. Reload
2521 can create such insns.
2523 mem in this case will be either register indirect or register
2524 indirect plus a valid offset.
2526 register -> register move where REGNO(dst) == REGNO(src + 1)
2527 someone (Tim/Tege?) claimed this can happen for parameter loads.
2529 Handle mem -> register case first. */
2530 if (optype0 == REGOP
2531 && (optype1 == MEMOP || optype1 == OFFSOP)
2532 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2535 /* Do the late half first. */
2537 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2538 output_asm_insn (singlemove_string (latehalf), latehalf);
2542 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2543 return singlemove_string (operands);
2546 /* Now handle register -> register case. */
2547 if (optype0 == REGOP && optype1 == REGOP
2548 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2550 output_asm_insn (singlemove_string (latehalf), latehalf);
2551 return singlemove_string (operands);
2554 /* Normal case: do the two words, low-numbered first. */
2556 output_asm_insn (singlemove_string (operands), operands);
2558 /* Make any unoffsettable addresses point at high-numbered word. */
2560 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2562 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2565 output_asm_insn (singlemove_string (latehalf), latehalf);
2567 /* Undo the adds we just did. */
2569 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2571 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2577 output_fp_move_double (rtx *operands)
2579 if (FP_REG_P (operands[0]))
2581 if (FP_REG_P (operands[1])
2582 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2583 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2585 output_asm_insn ("fldd%F1 %1,%0", operands);
2587 else if (FP_REG_P (operands[1]))
2589 output_asm_insn ("fstd%F0 %1,%0", operands);
2595 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2597 /* This is a pain. You have to be prepared to deal with an
2598 arbitrary address here including pre/post increment/decrement.
2600 so avoid this in the MD. */
2601 gcc_assert (GET_CODE (operands[0]) == REG);
2603 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2604 xoperands[0] = operands[0];
2605 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2610 /* Return a REG that occurs in ADDR with coefficient 1.
2611 ADDR can be effectively incremented by incrementing REG. */
2614 find_addr_reg (rtx addr)
2616 while (GET_CODE (addr) == PLUS)
2618 if (GET_CODE (XEXP (addr, 0)) == REG)
2619 addr = XEXP (addr, 0);
2620 else if (GET_CODE (XEXP (addr, 1)) == REG)
2621 addr = XEXP (addr, 1);
2622 else if (CONSTANT_P (XEXP (addr, 0)))
2623 addr = XEXP (addr, 1);
2624 else if (CONSTANT_P (XEXP (addr, 1)))
2625 addr = XEXP (addr, 0);
2629 gcc_assert (GET_CODE (addr) == REG);
2633 /* Emit code to perform a block move.
2635 OPERANDS[0] is the destination pointer as a REG, clobbered.
2636 OPERANDS[1] is the source pointer as a REG, clobbered.
2637 OPERANDS[2] is a register for temporary storage.
2638 OPERANDS[3] is a register for temporary storage.
2639 OPERANDS[4] is the size as a CONST_INT
2640 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2641 OPERANDS[6] is another temporary register. */
2644 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2646 int align = INTVAL (operands[5]);
2647 unsigned long n_bytes = INTVAL (operands[4]);
2649 /* We can't move more than a word at a time because the PA
2650 has no longer integer move insns. (Could use fp mem ops?) */
2651 if (align > (TARGET_64BIT ? 8 : 4))
2652 align = (TARGET_64BIT ? 8 : 4);
2654 /* Note that we know each loop below will execute at least twice
2655 (else we would have open-coded the copy). */
2659 /* Pre-adjust the loop counter. */
2660 operands[4] = GEN_INT (n_bytes - 16);
2661 output_asm_insn ("ldi %4,%2", operands);
2664 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2665 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2666 output_asm_insn ("std,ma %3,8(%0)", operands);
2667 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2668 output_asm_insn ("std,ma %6,8(%0)", operands);
2670 /* Handle the residual. There could be up to 7 bytes of
2671 residual to copy! */
2672 if (n_bytes % 16 != 0)
2674 operands[4] = GEN_INT (n_bytes % 8);
2675 if (n_bytes % 16 >= 8)
2676 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2677 if (n_bytes % 8 != 0)
2678 output_asm_insn ("ldd 0(%1),%6", operands);
2679 if (n_bytes % 16 >= 8)
2680 output_asm_insn ("std,ma %3,8(%0)", operands);
2681 if (n_bytes % 8 != 0)
2682 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2687 /* Pre-adjust the loop counter. */
2688 operands[4] = GEN_INT (n_bytes - 8);
2689 output_asm_insn ("ldi %4,%2", operands);
2692 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2693 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2694 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2695 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2696 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2698 /* Handle the residual. There could be up to 7 bytes of
2699 residual to copy! */
2700 if (n_bytes % 8 != 0)
2702 operands[4] = GEN_INT (n_bytes % 4);
2703 if (n_bytes % 8 >= 4)
2704 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2705 if (n_bytes % 4 != 0)
2706 output_asm_insn ("ldw 0(%1),%6", operands);
2707 if (n_bytes % 8 >= 4)
2708 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2709 if (n_bytes % 4 != 0)
2710 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2715 /* Pre-adjust the loop counter. */
2716 operands[4] = GEN_INT (n_bytes - 4);
2717 output_asm_insn ("ldi %4,%2", operands);
2720 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2721 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2722 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2723 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2724 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2726 /* Handle the residual. */
2727 if (n_bytes % 4 != 0)
2729 if (n_bytes % 4 >= 2)
2730 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2731 if (n_bytes % 2 != 0)
2732 output_asm_insn ("ldb 0(%1),%6", operands);
2733 if (n_bytes % 4 >= 2)
2734 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2735 if (n_bytes % 2 != 0)
2736 output_asm_insn ("stb %6,0(%0)", operands);
2741 /* Pre-adjust the loop counter. */
2742 operands[4] = GEN_INT (n_bytes - 2);
2743 output_asm_insn ("ldi %4,%2", operands);
2746 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2747 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2748 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2749 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2750 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2752 /* Handle the residual. */
2753 if (n_bytes % 2 != 0)
2755 output_asm_insn ("ldb 0(%1),%3", operands);
2756 output_asm_insn ("stb %3,0(%0)", operands);
2765 /* Count the number of insns necessary to handle this block move.
2767 Basic structure is the same as emit_block_move, except that we
2768 count insns rather than emit them. */
2771 compute_movmem_length (rtx insn)
2773 rtx pat = PATTERN (insn);
2774 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2775 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2776 unsigned int n_insns = 0;
2778 /* We can't move more than four bytes at a time because the PA
2779 has no longer integer move insns. (Could use fp mem ops?) */
2780 if (align > (TARGET_64BIT ? 8 : 4))
2781 align = (TARGET_64BIT ? 8 : 4);
2783 /* The basic copying loop. */
2787 if (n_bytes % (2 * align) != 0)
2789 if ((n_bytes % (2 * align)) >= align)
2792 if ((n_bytes % align) != 0)
2796 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2800 /* Emit code to perform a block clear.
2802 OPERANDS[0] is the destination pointer as a REG, clobbered.
2803 OPERANDS[1] is a register for temporary storage.
2804 OPERANDS[2] is the size as a CONST_INT
2805 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2808 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2810 int align = INTVAL (operands[3]);
2811 unsigned long n_bytes = INTVAL (operands[2]);
2813 /* We can't clear more than a word at a time because the PA
2814 has no longer integer move insns. */
2815 if (align > (TARGET_64BIT ? 8 : 4))
2816 align = (TARGET_64BIT ? 8 : 4);
2818 /* Note that we know each loop below will execute at least twice
2819 (else we would have open-coded the copy). */
2823 /* Pre-adjust the loop counter. */
2824 operands[2] = GEN_INT (n_bytes - 16);
2825 output_asm_insn ("ldi %2,%1", operands);
2828 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2829 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2830 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2832 /* Handle the residual. There could be up to 7 bytes of
2833 residual to copy! */
2834 if (n_bytes % 16 != 0)
2836 operands[2] = GEN_INT (n_bytes % 8);
2837 if (n_bytes % 16 >= 8)
2838 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2839 if (n_bytes % 8 != 0)
2840 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2845 /* Pre-adjust the loop counter. */
2846 operands[2] = GEN_INT (n_bytes - 8);
2847 output_asm_insn ("ldi %2,%1", operands);
2850 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2851 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2852 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2854 /* Handle the residual. There could be up to 7 bytes of
2855 residual to copy! */
2856 if (n_bytes % 8 != 0)
2858 operands[2] = GEN_INT (n_bytes % 4);
2859 if (n_bytes % 8 >= 4)
2860 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2861 if (n_bytes % 4 != 0)
2862 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2867 /* Pre-adjust the loop counter. */
2868 operands[2] = GEN_INT (n_bytes - 4);
2869 output_asm_insn ("ldi %2,%1", operands);
2872 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2873 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2874 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2876 /* Handle the residual. */
2877 if (n_bytes % 4 != 0)
2879 if (n_bytes % 4 >= 2)
2880 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2881 if (n_bytes % 2 != 0)
2882 output_asm_insn ("stb %%r0,0(%0)", operands);
2887 /* Pre-adjust the loop counter. */
2888 operands[2] = GEN_INT (n_bytes - 2);
2889 output_asm_insn ("ldi %2,%1", operands);
2892 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2893 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2894 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2896 /* Handle the residual. */
2897 if (n_bytes % 2 != 0)
2898 output_asm_insn ("stb %%r0,0(%0)", operands);
2907 /* Count the number of insns necessary to handle this block move.
2909 Basic structure is the same as emit_block_move, except that we
2910 count insns rather than emit them. */
2913 compute_clrmem_length (rtx insn)
2915 rtx pat = PATTERN (insn);
2916 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2917 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2918 unsigned int n_insns = 0;
2920 /* We can't clear more than a word at a time because the PA
2921 has no longer integer move insns. */
2922 if (align > (TARGET_64BIT ? 8 : 4))
2923 align = (TARGET_64BIT ? 8 : 4);
2925 /* The basic loop. */
2929 if (n_bytes % (2 * align) != 0)
2931 if ((n_bytes % (2 * align)) >= align)
2934 if ((n_bytes % align) != 0)
2938 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2944 output_and (rtx *operands)
2946 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2948 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2949 int ls0, ls1, ms0, p, len;
2951 for (ls0 = 0; ls0 < 32; ls0++)
2952 if ((mask & (1 << ls0)) == 0)
2955 for (ls1 = ls0; ls1 < 32; ls1++)
2956 if ((mask & (1 << ls1)) != 0)
2959 for (ms0 = ls1; ms0 < 32; ms0++)
2960 if ((mask & (1 << ms0)) == 0)
2963 gcc_assert (ms0 == 32);
2971 operands[2] = GEN_INT (len);
2972 return "{extru|extrw,u} %1,31,%2,%0";
2976 /* We could use this `depi' for the case above as well, but `depi'
2977 requires one more register file access than an `extru'. */
2982 operands[2] = GEN_INT (p);
2983 operands[3] = GEN_INT (len);
2984 return "{depi|depwi} 0,%2,%3,%0";
2988 return "and %1,%2,%0";
2991 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2992 storing the result in operands[0]. */
2994 output_64bit_and (rtx *operands)
2996 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2998 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2999 int ls0, ls1, ms0, p, len;
3001 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3002 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3005 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3006 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3009 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3010 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3013 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3015 if (ls1 == HOST_BITS_PER_WIDE_INT)
3021 operands[2] = GEN_INT (len);
3022 return "extrd,u %1,63,%2,%0";
3026 /* We could use this `depi' for the case above as well, but `depi'
3027 requires one more register file access than an `extru'. */
3032 operands[2] = GEN_INT (p);
3033 operands[3] = GEN_INT (len);
3034 return "depdi 0,%2,%3,%0";
3038 return "and %1,%2,%0";
3042 output_ior (rtx *operands)
3044 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3045 int bs0, bs1, p, len;
3047 if (INTVAL (operands[2]) == 0)
3048 return "copy %1,%0";
3050 for (bs0 = 0; bs0 < 32; bs0++)
3051 if ((mask & (1 << bs0)) != 0)
3054 for (bs1 = bs0; bs1 < 32; bs1++)
3055 if ((mask & (1 << bs1)) == 0)
3058 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3063 operands[2] = GEN_INT (p);
3064 operands[3] = GEN_INT (len);
3065 return "{depi|depwi} -1,%2,%3,%0";
3068 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3069 storing the result in operands[0]. */
3071 output_64bit_ior (rtx *operands)
3073 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3074 int bs0, bs1, p, len;
3076 if (INTVAL (operands[2]) == 0)
3077 return "copy %1,%0";
3079 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3080 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3083 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3084 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3087 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3088 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3093 operands[2] = GEN_INT (p);
3094 operands[3] = GEN_INT (len);
3095 return "depdi -1,%2,%3,%0";
3098 /* Target hook for assembling integer objects. This code handles
3099 aligned SI and DI integers specially since function references
3100 must be preceded by P%. */
3103 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3105 if (size == UNITS_PER_WORD
3107 && function_label_operand (x, VOIDmode))
3109 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3110 output_addr_const (asm_out_file, x);
3111 fputc ('\n', asm_out_file);
3114 return default_assemble_integer (x, size, aligned_p);
3117 /* Output an ascii string. */
3119 output_ascii (FILE *file, const char *p, int size)
3123 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3125 /* The HP assembler can only take strings of 256 characters at one
3126 time. This is a limitation on input line length, *not* the
3127 length of the string. Sigh. Even worse, it seems that the
3128 restriction is in number of input characters (see \xnn &
3129 \whatever). So we have to do this very carefully. */
3131 fputs ("\t.STRING \"", file);
3134 for (i = 0; i < size; i += 4)
3138 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3140 register unsigned int c = (unsigned char) p[i + io];
3142 if (c == '\"' || c == '\\')
3143 partial_output[co++] = '\\';
3144 if (c >= ' ' && c < 0177)
3145 partial_output[co++] = c;
3149 partial_output[co++] = '\\';
3150 partial_output[co++] = 'x';
3151 hexd = c / 16 - 0 + '0';
3153 hexd -= '9' - 'a' + 1;
3154 partial_output[co++] = hexd;
3155 hexd = c % 16 - 0 + '0';
3157 hexd -= '9' - 'a' + 1;
3158 partial_output[co++] = hexd;
3161 if (chars_output + co > 243)
3163 fputs ("\"\n\t.STRING \"", file);
3166 fwrite (partial_output, 1, (size_t) co, file);
3170 fputs ("\"\n", file);
3173 /* Try to rewrite floating point comparisons & branches to avoid
3174 useless add,tr insns.
3176 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3177 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3178 first attempt to remove useless add,tr insns. It is zero
3179 for the second pass as reorg sometimes leaves bogus REG_DEAD
3182 When CHECK_NOTES is zero we can only eliminate add,tr insns
3183 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3186 remove_useless_addtr_insns (int check_notes)
3189 static int pass = 0;
3191 /* This is fairly cheap, so always run it when optimizing. */
3195 int fbranch_count = 0;
3197 /* Walk all the insns in this function looking for fcmp & fbranch
3198 instructions. Keep track of how many of each we find. */
3199 for (insn = get_insns (); insn; insn = next_insn (insn))
3203 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3204 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3207 tmp = PATTERN (insn);
3209 /* It must be a set. */
3210 if (GET_CODE (tmp) != SET)
3213 /* If the destination is CCFP, then we've found an fcmp insn. */
3214 tmp = SET_DEST (tmp);
3215 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3221 tmp = PATTERN (insn);
3222 /* If this is an fbranch instruction, bump the fbranch counter. */
3223 if (GET_CODE (tmp) == SET
3224 && SET_DEST (tmp) == pc_rtx
3225 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3226 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3227 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3228 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3236 /* Find all floating point compare + branch insns. If possible,
3237 reverse the comparison & the branch to avoid add,tr insns. */
3238 for (insn = get_insns (); insn; insn = next_insn (insn))
3242 /* Ignore anything that isn't an INSN. */
3243 if (GET_CODE (insn) != INSN)
3246 tmp = PATTERN (insn);
3248 /* It must be a set. */
3249 if (GET_CODE (tmp) != SET)
3252 /* The destination must be CCFP, which is register zero. */
3253 tmp = SET_DEST (tmp);
3254 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3257 /* INSN should be a set of CCFP.
3259 See if the result of this insn is used in a reversed FP
3260 conditional branch. If so, reverse our condition and
3261 the branch. Doing so avoids useless add,tr insns. */
3262 next = next_insn (insn);
3265 /* Jumps, calls and labels stop our search. */
3266 if (GET_CODE (next) == JUMP_INSN
3267 || GET_CODE (next) == CALL_INSN
3268 || GET_CODE (next) == CODE_LABEL)
3271 /* As does another fcmp insn. */
3272 if (GET_CODE (next) == INSN
3273 && GET_CODE (PATTERN (next)) == SET
3274 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3275 && REGNO (SET_DEST (PATTERN (next))) == 0)
3278 next = next_insn (next);
3281 /* Is NEXT_INSN a branch? */
3283 && GET_CODE (next) == JUMP_INSN)
3285 rtx pattern = PATTERN (next);
3287 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3288 and CCFP dies, then reverse our conditional and the branch
3289 to avoid the add,tr. */
3290 if (GET_CODE (pattern) == SET
3291 && SET_DEST (pattern) == pc_rtx
3292 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3293 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3294 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3295 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3296 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3297 && (fcmp_count == fbranch_count
3299 && find_regno_note (next, REG_DEAD, 0))))
3301 /* Reverse the branch. */
3302 tmp = XEXP (SET_SRC (pattern), 1);
3303 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3304 XEXP (SET_SRC (pattern), 2) = tmp;
3305 INSN_CODE (next) = -1;
3307 /* Reverse our condition. */
3308 tmp = PATTERN (insn);
3309 PUT_CODE (XEXP (tmp, 1),
3310 (reverse_condition_maybe_unordered
3311 (GET_CODE (XEXP (tmp, 1)))));
3321 /* You may have trouble believing this, but this is the 32 bit HP-PA
3326 Variable arguments (optional; any number may be allocated)
3328 SP-(4*(N+9)) arg word N
3333 Fixed arguments (must be allocated; may remain unused)
3342 SP-32 External Data Pointer (DP)
3344 SP-24 External/stub RP (RP')
3348 SP-8 Calling Stub RP (RP'')
3353 SP-0 Stack Pointer (points to next available address)
3357 /* This function saves registers as follows. Registers marked with ' are
3358 this function's registers (as opposed to the previous function's).
3359 If a frame_pointer isn't needed, r4 is saved as a general register;
3360 the space for the frame pointer is still allocated, though, to keep
3366 SP (FP') Previous FP
3367 SP + 4 Alignment filler (sigh)
3368 SP + 8 Space for locals reserved here.
3372 SP + n All call saved register used.
3376 SP + o All call saved fp registers used.
3380 SP + p (SP') points to next available address.
3384 /* Global variables set by output_function_prologue(). */
3385 /* Size of frame. Need to know this to emit return insns from
3387 static HOST_WIDE_INT actual_fsize, local_fsize;
3388 static int save_fregs;
3390 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3391 Handle case where DISP > 8k by using the add_high_const patterns.
3393 Note in DISP > 8k case, we will leave the high part of the address
3394 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3397 store_reg (int reg, HOST_WIDE_INT disp, int base)
3399 rtx insn, dest, src, basereg;
3401 src = gen_rtx_REG (word_mode, reg);
3402 basereg = gen_rtx_REG (Pmode, base);
3403 if (VAL_14_BITS_P (disp))
3405 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3406 insn = emit_move_insn (dest, src);
3408 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3410 rtx delta = GEN_INT (disp);
3411 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3413 emit_move_insn (tmpreg, delta);
3414 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3417 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3418 gen_rtx_SET (VOIDmode, tmpreg,
3419 gen_rtx_PLUS (Pmode, basereg, delta)));
3420 RTX_FRAME_RELATED_P (insn) = 1;
3422 dest = gen_rtx_MEM (word_mode, tmpreg);
3423 insn = emit_move_insn (dest, src);
3427 rtx delta = GEN_INT (disp);
3428 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3429 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3431 emit_move_insn (tmpreg, high);
3432 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3433 insn = emit_move_insn (dest, src);
3435 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3436 gen_rtx_SET (VOIDmode,
3437 gen_rtx_MEM (word_mode,
3438 gen_rtx_PLUS (word_mode,
3445 RTX_FRAME_RELATED_P (insn) = 1;
3448 /* Emit RTL to store REG at the memory location specified by BASE and then
3449 add MOD to BASE. MOD must be <= 8k. */
3452 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3454 rtx insn, basereg, srcreg, delta;
3456 gcc_assert (VAL_14_BITS_P (mod));
3458 basereg = gen_rtx_REG (Pmode, base);
3459 srcreg = gen_rtx_REG (word_mode, reg);
3460 delta = GEN_INT (mod);
3462 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3465 RTX_FRAME_RELATED_P (insn) = 1;
3467 /* RTX_FRAME_RELATED_P must be set on each frame related set
3468 in a parallel with more than one element. */
3469 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3470 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3474 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3475 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3476 whether to add a frame note or not.
3478 In the DISP > 8k case, we leave the high part of the address in %r1.
3479 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3482 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3486 if (VAL_14_BITS_P (disp))
3488 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3489 plus_constant (gen_rtx_REG (Pmode, base), disp));
3491 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3493 rtx basereg = gen_rtx_REG (Pmode, base);
3494 rtx delta = GEN_INT (disp);
3495 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3497 emit_move_insn (tmpreg, delta);
3498 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3499 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3501 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3502 gen_rtx_SET (VOIDmode, tmpreg,
3503 gen_rtx_PLUS (Pmode, basereg, delta)));
3507 rtx basereg = gen_rtx_REG (Pmode, base);
3508 rtx delta = GEN_INT (disp);
3509 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3511 emit_move_insn (tmpreg,
3512 gen_rtx_PLUS (Pmode, basereg,
3513 gen_rtx_HIGH (Pmode, delta)));
3514 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3515 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3518 if (DO_FRAME_NOTES && note)
3519 RTX_FRAME_RELATED_P (insn) = 1;
3523 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3528 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3529 be consistent with the rounding and size calculation done here.
3530 Change them at the same time. */
3532 /* We do our own stack alignment. First, round the size of the
3533 stack locals up to a word boundary. */
3534 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3536 /* Space for previous frame pointer + filler. If any frame is
3537 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3538 waste some space here for the sake of HP compatibility. The
3539 first slot is only used when the frame pointer is needed. */
3540 if (size || frame_pointer_needed)
3541 size += STARTING_FRAME_OFFSET;
3543 /* If the current function calls __builtin_eh_return, then we need
3544 to allocate stack space for registers that will hold data for
3545 the exception handler. */
3546 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3550 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3552 size += i * UNITS_PER_WORD;
3555 /* Account for space used by the callee general register saves. */
3556 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3557 if (df_regs_ever_live_p (i))
3558 size += UNITS_PER_WORD;
3560 /* Account for space used by the callee floating point register saves. */
3561 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3562 if (df_regs_ever_live_p (i)
3563 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3567 /* We always save both halves of the FP register, so always
3568 increment the frame size by 8 bytes. */
3572 /* If any of the floating registers are saved, account for the
3573 alignment needed for the floating point register save block. */
3576 size = (size + 7) & ~7;
3581 /* The various ABIs include space for the outgoing parameters in the
3582 size of the current function's stack frame. We don't need to align
3583 for the outgoing arguments as their alignment is set by the final
3584 rounding for the frame as a whole. */
3585 size += crtl->outgoing_args_size;
3587 /* Allocate space for the fixed frame marker. This space must be
3588 allocated for any function that makes calls or allocates
3590 if (!current_function_is_leaf || size)
3591 size += TARGET_64BIT ? 48 : 32;
3593 /* Finally, round to the preferred stack boundary. */
3594 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3595 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3598 /* Generate the assembly code for function entry. FILE is a stdio
3599 stream to output the code to. SIZE is an int: how many units of
3600 temporary storage to allocate.
3602 Refer to the array `regs_ever_live' to determine which registers to
3603 save; `regs_ever_live[I]' is nonzero if register number I is ever
3604 used in the function. This function is responsible for knowing
3605 which registers should not be saved even if used. */
3607 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3608 of memory. If any fpu reg is used in the function, we allocate
3609 such a block here, at the bottom of the frame, just in case it's needed.
3611 If this function is a leaf procedure, then we may choose not
3612 to do a "save" insn. The decision about whether or not
3613 to do this is made in regclass.c. */
3616 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3618 /* The function's label and associated .PROC must never be
3619 separated and must be output *after* any profiling declarations
3620 to avoid changing spaces/subspaces within a procedure. */
3621 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3622 fputs ("\t.PROC\n", file);
3624 /* hppa_expand_prologue does the dirty work now. We just need
3625 to output the assembler directives which denote the start
3627 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3628 if (current_function_is_leaf)
3629 fputs (",NO_CALLS", file);
3631 fputs (",CALLS", file);
3633 fputs (",SAVE_RP", file);
3635 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3636 at the beginning of the frame and that it is used as the frame
3637 pointer for the frame. We do this because our current frame
3638 layout doesn't conform to that specified in the HP runtime
3639 documentation and we need a way to indicate to programs such as
3640 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3641 isn't used by HP compilers but is supported by the assembler.
3642 However, SAVE_SP is supposed to indicate that the previous stack
3643 pointer has been saved in the frame marker. */
3644 if (frame_pointer_needed)
3645 fputs (",SAVE_SP", file);
3647 /* Pass on information about the number of callee register saves
3648 performed in the prologue.
3650 The compiler is supposed to pass the highest register number
3651 saved, the assembler then has to adjust that number before
3652 entering it into the unwind descriptor (to account for any
3653 caller saved registers with lower register numbers than the
3654 first callee saved register). */
3656 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3659 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3661 fputs ("\n\t.ENTRY\n", file);
3663 remove_useless_addtr_insns (0);
3667 hppa_expand_prologue (void)
3669 int merge_sp_adjust_with_store = 0;
3670 HOST_WIDE_INT size = get_frame_size ();
3671 HOST_WIDE_INT offset;
3679 /* Compute total size for frame pointer, filler, locals and rounding to
3680 the next word boundary. Similar code appears in compute_frame_size
3681 and must be changed in tandem with this code. */
3682 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3683 if (local_fsize || frame_pointer_needed)
3684 local_fsize += STARTING_FRAME_OFFSET;
3686 actual_fsize = compute_frame_size (size, &save_fregs);
3688 /* Compute a few things we will use often. */
3689 tmpreg = gen_rtx_REG (word_mode, 1);
3691 /* Save RP first. The calling conventions manual states RP will
3692 always be stored into the caller's frame at sp - 20 or sp - 16
3693 depending on which ABI is in use. */
3694 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3696 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3702 /* Allocate the local frame and set up the frame pointer if needed. */
3703 if (actual_fsize != 0)
3705 if (frame_pointer_needed)
3707 /* Copy the old frame pointer temporarily into %r1. Set up the
3708 new stack pointer, then store away the saved old frame pointer
3709 into the stack at sp and at the same time update the stack
3710 pointer by actual_fsize bytes. Two versions, first
3711 handles small (<8k) frames. The second handles large (>=8k)
3713 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3715 RTX_FRAME_RELATED_P (insn) = 1;
3717 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3719 RTX_FRAME_RELATED_P (insn) = 1;
3721 if (VAL_14_BITS_P (actual_fsize))
3722 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3725 /* It is incorrect to store the saved frame pointer at *sp,
3726 then increment sp (writes beyond the current stack boundary).
3728 So instead use stwm to store at *sp and post-increment the
3729 stack pointer as an atomic operation. Then increment sp to
3730 finish allocating the new frame. */
3731 HOST_WIDE_INT adjust1 = 8192 - 64;
3732 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3734 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3735 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3739 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3740 we need to store the previous stack pointer (frame pointer)
3741 into the frame marker on targets that use the HP unwind
3742 library. This allows the HP unwind library to be used to
3743 unwind GCC frames. However, we are not fully compatible
3744 with the HP library because our frame layout differs from
3745 that specified in the HP runtime specification.
3747 We don't want a frame note on this instruction as the frame
3748 marker moves during dynamic stack allocation.
3750 This instruction also serves as a blockage to prevent
3751 register spills from being scheduled before the stack
3752 pointer is raised. This is necessary as we store
3753 registers using the frame pointer as a base register,
3754 and the frame pointer is set before sp is raised. */
3755 if (TARGET_HPUX_UNWIND_LIBRARY)
3757 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3758 GEN_INT (TARGET_64BIT ? -8 : -4));
3760 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3764 emit_insn (gen_blockage ());
3766 /* no frame pointer needed. */
3769 /* In some cases we can perform the first callee register save
3770 and allocating the stack frame at the same time. If so, just
3771 make a note of it and defer allocating the frame until saving
3772 the callee registers. */
3773 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3774 merge_sp_adjust_with_store = 1;
3775 /* Can not optimize. Adjust the stack frame by actual_fsize
3778 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3783 /* Normal register save.
3785 Do not save the frame pointer in the frame_pointer_needed case. It
3786 was done earlier. */
3787 if (frame_pointer_needed)
3789 offset = local_fsize;
3791 /* Saving the EH return data registers in the frame is the simplest
3792 way to get the frame unwind information emitted. We put them
3793 just before the general registers. */
3794 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3796 unsigned int i, regno;
3800 regno = EH_RETURN_DATA_REGNO (i);
3801 if (regno == INVALID_REGNUM)
3804 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3805 offset += UNITS_PER_WORD;
3809 for (i = 18; i >= 4; i--)
3810 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3812 store_reg (i, offset, FRAME_POINTER_REGNUM);
3813 offset += UNITS_PER_WORD;
3816 /* Account for %r3 which is saved in a special place. */
3819 /* No frame pointer needed. */
3822 offset = local_fsize - actual_fsize;
3824 /* Saving the EH return data registers in the frame is the simplest
3825 way to get the frame unwind information emitted. */
3826 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3828 unsigned int i, regno;
3832 regno = EH_RETURN_DATA_REGNO (i);
3833 if (regno == INVALID_REGNUM)
3836 /* If merge_sp_adjust_with_store is nonzero, then we can
3837 optimize the first save. */
3838 if (merge_sp_adjust_with_store)
3840 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3841 merge_sp_adjust_with_store = 0;
3844 store_reg (regno, offset, STACK_POINTER_REGNUM);
3845 offset += UNITS_PER_WORD;
3849 for (i = 18; i >= 3; i--)
3850 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3852 /* If merge_sp_adjust_with_store is nonzero, then we can
3853 optimize the first GR save. */
3854 if (merge_sp_adjust_with_store)
3856 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3857 merge_sp_adjust_with_store = 0;
3860 store_reg (i, offset, STACK_POINTER_REGNUM);
3861 offset += UNITS_PER_WORD;
3865 /* If we wanted to merge the SP adjustment with a GR save, but we never
3866 did any GR saves, then just emit the adjustment here. */
3867 if (merge_sp_adjust_with_store)
3868 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3872 /* The hppa calling conventions say that %r19, the pic offset
3873 register, is saved at sp - 32 (in this function's frame)
3874 when generating PIC code. FIXME: What is the correct thing
3875 to do for functions which make no calls and allocate no
3876 frame? Do we need to allocate a frame, or can we just omit
3877 the save? For now we'll just omit the save.
3879 We don't want a note on this insn as the frame marker can
3880 move if there is a dynamic stack allocation. */
3881 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3883 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3885 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3889 /* Align pointer properly (doubleword boundary). */
3890 offset = (offset + 7) & ~7;
3892 /* Floating point register store. */
3897 /* First get the frame or stack pointer to the start of the FP register
3899 if (frame_pointer_needed)
3901 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3902 base = frame_pointer_rtx;
3906 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3907 base = stack_pointer_rtx;
3910 /* Now actually save the FP registers. */
3911 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3913 if (df_regs_ever_live_p (i)
3914 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3916 rtx addr, insn, reg;
3917 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3918 reg = gen_rtx_REG (DFmode, i);
3919 insn = emit_move_insn (addr, reg);
3922 RTX_FRAME_RELATED_P (insn) = 1;
3925 rtx mem = gen_rtx_MEM (DFmode,
3926 plus_constant (base, offset));
3927 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3928 gen_rtx_SET (VOIDmode, mem, reg));
3932 rtx meml = gen_rtx_MEM (SFmode,
3933 plus_constant (base, offset));
3934 rtx memr = gen_rtx_MEM (SFmode,
3935 plus_constant (base, offset + 4));
3936 rtx regl = gen_rtx_REG (SFmode, i);
3937 rtx regr = gen_rtx_REG (SFmode, i + 1);
3938 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3939 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3942 RTX_FRAME_RELATED_P (setl) = 1;
3943 RTX_FRAME_RELATED_P (setr) = 1;
3944 vec = gen_rtvec (2, setl, setr);
3945 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3946 gen_rtx_SEQUENCE (VOIDmode, vec));
3949 offset += GET_MODE_SIZE (DFmode);
3956 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3957 Handle case where DISP > 8k by using the add_high_const patterns. */
3960 load_reg (int reg, HOST_WIDE_INT disp, int base)
3962 rtx dest = gen_rtx_REG (word_mode, reg);
3963 rtx basereg = gen_rtx_REG (Pmode, base);
3966 if (VAL_14_BITS_P (disp))
3967 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3968 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3970 rtx delta = GEN_INT (disp);
3971 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3973 emit_move_insn (tmpreg, delta);
3974 if (TARGET_DISABLE_INDEXING)
3976 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3977 src = gen_rtx_MEM (word_mode, tmpreg);
3980 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3984 rtx delta = GEN_INT (disp);
3985 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3986 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3988 emit_move_insn (tmpreg, high);
3989 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3992 emit_move_insn (dest, src);
3995 /* Update the total code bytes output to the text section. */
3998 update_total_code_bytes (unsigned int nbytes)
4000 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4001 && !IN_NAMED_SECTION_P (cfun->decl))
4003 unsigned int old_total = total_code_bytes;
4005 total_code_bytes += nbytes;
4007 /* Be prepared to handle overflows. */
4008 if (old_total > total_code_bytes)
4009 total_code_bytes = UINT_MAX;
4013 /* This function generates the assembly code for function exit.
4014 Args are as for output_function_prologue ().
4016 The function epilogue should not depend on the current stack
4017 pointer! It should use the frame pointer only. This is mandatory
4018 because of alloca; we also take advantage of it to omit stack
4019 adjustments before returning. */
4022 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4024 rtx insn = get_last_insn ();
4028 /* hppa_expand_epilogue does the dirty work now. We just need
4029 to output the assembler directives which denote the end
4032 To make debuggers happy, emit a nop if the epilogue was completely
4033 eliminated due to a volatile call as the last insn in the
4034 current function. That way the return address (in %r2) will
4035 always point to a valid instruction in the current function. */
4037 /* Get the last real insn. */
4038 if (GET_CODE (insn) == NOTE)
4039 insn = prev_real_insn (insn);
4041 /* If it is a sequence, then look inside. */
4042 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4043 insn = XVECEXP (PATTERN (insn), 0, 0);
4045 /* If insn is a CALL_INSN, then it must be a call to a volatile
4046 function (otherwise there would be epilogue insns). */
4047 if (insn && GET_CODE (insn) == CALL_INSN)
4049 fputs ("\tnop\n", file);
4053 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4055 if (TARGET_SOM && TARGET_GAS)
4057 /* We done with this subspace except possibly for some additional
4058 debug information. Forget that we are in this subspace to ensure
4059 that the next function is output in its own subspace. */
4061 cfun->machine->in_nsubspa = 2;
4064 if (INSN_ADDRESSES_SET_P ())
4066 insn = get_last_nonnote_insn ();
4067 last_address += INSN_ADDRESSES (INSN_UID (insn));
4069 last_address += insn_default_length (insn);
4070 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4071 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4074 last_address = UINT_MAX;
4076 /* Finally, update the total number of code bytes output so far. */
4077 update_total_code_bytes (last_address);
4081 hppa_expand_epilogue (void)
4084 HOST_WIDE_INT offset;
4085 HOST_WIDE_INT ret_off = 0;
4087 int merge_sp_adjust_with_load = 0;
4089 /* We will use this often. */
4090 tmpreg = gen_rtx_REG (word_mode, 1);
4092 /* Try to restore RP early to avoid load/use interlocks when
4093 RP gets used in the return (bv) instruction. This appears to still
4094 be necessary even when we schedule the prologue and epilogue. */
4097 ret_off = TARGET_64BIT ? -16 : -20;
4098 if (frame_pointer_needed)
4100 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4105 /* No frame pointer, and stack is smaller than 8k. */
4106 if (VAL_14_BITS_P (ret_off - actual_fsize))
4108 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4114 /* General register restores. */
4115 if (frame_pointer_needed)
4117 offset = local_fsize;
4119 /* If the current function calls __builtin_eh_return, then we need
4120 to restore the saved EH data registers. */
4121 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4123 unsigned int i, regno;
4127 regno = EH_RETURN_DATA_REGNO (i);
4128 if (regno == INVALID_REGNUM)
4131 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4132 offset += UNITS_PER_WORD;
4136 for (i = 18; i >= 4; i--)
4137 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4139 load_reg (i, offset, FRAME_POINTER_REGNUM);
4140 offset += UNITS_PER_WORD;
4145 offset = local_fsize - actual_fsize;
4147 /* If the current function calls __builtin_eh_return, then we need
4148 to restore the saved EH data registers. */
4149 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4151 unsigned int i, regno;
4155 regno = EH_RETURN_DATA_REGNO (i);
4156 if (regno == INVALID_REGNUM)
4159 /* Only for the first load.
4160 merge_sp_adjust_with_load holds the register load
4161 with which we will merge the sp adjustment. */
4162 if (merge_sp_adjust_with_load == 0
4164 && VAL_14_BITS_P (-actual_fsize))
4165 merge_sp_adjust_with_load = regno;
4167 load_reg (regno, offset, STACK_POINTER_REGNUM);
4168 offset += UNITS_PER_WORD;
4172 for (i = 18; i >= 3; i--)
4174 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4176 /* Only for the first load.
4177 merge_sp_adjust_with_load holds the register load
4178 with which we will merge the sp adjustment. */
4179 if (merge_sp_adjust_with_load == 0
4181 && VAL_14_BITS_P (-actual_fsize))
4182 merge_sp_adjust_with_load = i;
4184 load_reg (i, offset, STACK_POINTER_REGNUM);
4185 offset += UNITS_PER_WORD;
4190 /* Align pointer properly (doubleword boundary). */
4191 offset = (offset + 7) & ~7;
4193 /* FP register restores. */
4196 /* Adjust the register to index off of. */
4197 if (frame_pointer_needed)
4198 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4200 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4202 /* Actually do the restores now. */
4203 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4204 if (df_regs_ever_live_p (i)
4205 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4207 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4208 rtx dest = gen_rtx_REG (DFmode, i);
4209 emit_move_insn (dest, src);
4213 /* Emit a blockage insn here to keep these insns from being moved to
4214 an earlier spot in the epilogue, or into the main instruction stream.
4216 This is necessary as we must not cut the stack back before all the
4217 restores are finished. */
4218 emit_insn (gen_blockage ());
4220 /* Reset stack pointer (and possibly frame pointer). The stack
4221 pointer is initially set to fp + 64 to avoid a race condition. */
4222 if (frame_pointer_needed)
4224 rtx delta = GEN_INT (-64);
4226 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4227 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4229 /* If we were deferring a callee register restore, do it now. */
4230 else if (merge_sp_adjust_with_load)
4232 rtx delta = GEN_INT (-actual_fsize);
4233 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4235 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4237 else if (actual_fsize != 0)
4238 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4241 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4242 frame greater than 8k), do so now. */
4244 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4246 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4248 rtx sa = EH_RETURN_STACKADJ_RTX;
4250 emit_insn (gen_blockage ());
4251 emit_insn (TARGET_64BIT
4252 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4253 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4258 hppa_pic_save_rtx (void)
4260 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4263 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4264 #define NO_DEFERRED_PROFILE_COUNTERS 0
4268 /* Vector of funcdef numbers. */
4269 static VEC(int,heap) *funcdef_nos;
4271 /* Output deferred profile counters. */
4273 output_deferred_profile_counters (void)
4278 if (VEC_empty (int, funcdef_nos))
4281 switch_to_section (data_section);
4282 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4283 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4285 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4287 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4288 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4291 VEC_free (int, heap, funcdef_nos);
4295 hppa_profile_hook (int label_no)
4297 /* We use SImode for the address of the function in both 32 and
4298 64-bit code to avoid having to provide DImode versions of the
4299 lcla2 and load_offset_label_address insn patterns. */
4300 rtx reg = gen_reg_rtx (SImode);
4301 rtx label_rtx = gen_label_rtx ();
4302 rtx begin_label_rtx, call_insn;
4303 char begin_label_name[16];
4305 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4307 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4310 emit_move_insn (arg_pointer_rtx,
4311 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4314 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4316 /* The address of the function is loaded into %r25 with an instruction-
4317 relative sequence that avoids the use of relocations. The sequence
4318 is split so that the load_offset_label_address instruction can
4319 occupy the delay slot of the call to _mcount. */
4321 emit_insn (gen_lcla2 (reg, label_rtx));
4323 emit_insn (gen_lcla1 (reg, label_rtx));
4325 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4326 reg, begin_label_rtx, label_rtx));
4328 #if !NO_DEFERRED_PROFILE_COUNTERS
4330 rtx count_label_rtx, addr, r24;
4331 char count_label_name[16];
4333 VEC_safe_push (int, heap, funcdef_nos, label_no);
4334 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4335 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4337 addr = force_reg (Pmode, count_label_rtx);
4338 r24 = gen_rtx_REG (Pmode, 24);
4339 emit_move_insn (r24, addr);
4342 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4343 gen_rtx_SYMBOL_REF (Pmode,
4345 GEN_INT (TARGET_64BIT ? 24 : 12)));
4347 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4352 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4353 gen_rtx_SYMBOL_REF (Pmode,
4355 GEN_INT (TARGET_64BIT ? 16 : 8)));
4359 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4360 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4362 /* Indicate the _mcount call cannot throw, nor will it execute a
4364 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4367 /* Fetch the return address for the frame COUNT steps up from
4368 the current frame, after the prologue. FRAMEADDR is the
4369 frame pointer of the COUNT frame.
4371 We want to ignore any export stub remnants here. To handle this,
4372 we examine the code at the return address, and if it is an export
4373 stub, we return a memory rtx for the stub return address stored
4376 The value returned is used in two different ways:
4378 1. To find a function's caller.
4380 2. To change the return address for a function.
4382 This function handles most instances of case 1; however, it will
4383 fail if there are two levels of stubs to execute on the return
4384 path. The only way I believe that can happen is if the return value
4385 needs a parameter relocation, which never happens for C code.
4387 This function handles most instances of case 2; however, it will
4388 fail if we did not originally have stub code on the return path
4389 but will need stub code on the new return path. This can happen if
4390 the caller & callee are both in the main program, but the new
4391 return location is in a shared library. */
4394 return_addr_rtx (int count, rtx frameaddr)
4401 /* Instruction stream at the normal return address for the export stub:
4403 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4404 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4405 0x00011820 | stub+16: mtsp r1,sr0
4406 0xe0400002 | stub+20: be,n 0(sr0,rp)
4408 0xe0400002 must be specified as -532676606 so that it won't be
4409 rejected as an invalid immediate operand on 64-bit hosts. */
4411 HOST_WIDE_INT insns[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4417 rp = get_hard_reg_initial_val (Pmode, 2);
4419 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4422 /* If there is no export stub then just use the value saved from
4423 the return pointer register. */
4425 saved_rp = gen_reg_rtx (Pmode);
4426 emit_move_insn (saved_rp, rp);
4428 /* Get pointer to the instruction stream. We have to mask out the
4429 privilege level from the two low order bits of the return address
4430 pointer here so that ins will point to the start of the first
4431 instruction that would have been executed if we returned. */
4432 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4433 label = gen_label_rtx ();
4435 /* Check the instruction stream at the normal return address for the
4436 export stub. If it is an export stub, than our return address is
4437 really in -24[frameaddr]. */
4439 for (i = 0; i < 3; i++)
4441 rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4));
4442 rtx op1 = GEN_INT (insns[i]);
4443 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4446 /* Here we know that our return address points to an export
4447 stub. We don't want to return the address of the export stub,
4448 but rather the return address of the export stub. That return
4449 address is stored at -24[frameaddr]. */
4451 emit_move_insn (saved_rp,
4453 memory_address (Pmode,
4454 plus_constant (frameaddr,
4463 emit_bcond_fp (rtx operands[])
4465 enum rtx_code code = GET_CODE (operands[0]);
4466 rtx operand0 = operands[1];
4467 rtx operand1 = operands[2];
4468 rtx label = operands[3];
4470 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4471 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4473 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4474 gen_rtx_IF_THEN_ELSE (VOIDmode,
4477 gen_rtx_REG (CCFPmode, 0),
4479 gen_rtx_LABEL_REF (VOIDmode, label),
4484 /* Adjust the cost of a scheduling dependency. Return the new cost of
4485 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4488 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4490 enum attr_type attr_type;
4492 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4493 true dependencies as they are described with bypasses now. */
4494 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4497 if (! recog_memoized (insn))
4500 attr_type = get_attr_type (insn);
4502 switch (REG_NOTE_KIND (link))
4505 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4508 if (attr_type == TYPE_FPLOAD)
4510 rtx pat = PATTERN (insn);
4511 rtx dep_pat = PATTERN (dep_insn);
4512 if (GET_CODE (pat) == PARALLEL)
4514 /* This happens for the fldXs,mb patterns. */
4515 pat = XVECEXP (pat, 0, 0);
4517 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4518 /* If this happens, we have to extend this to schedule
4519 optimally. Return 0 for now. */
4522 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4524 if (! recog_memoized (dep_insn))
4526 switch (get_attr_type (dep_insn))
4533 case TYPE_FPSQRTSGL:
4534 case TYPE_FPSQRTDBL:
4535 /* A fpload can't be issued until one cycle before a
4536 preceding arithmetic operation has finished if
4537 the target of the fpload is any of the sources
4538 (or destination) of the arithmetic operation. */
4539 return insn_default_latency (dep_insn) - 1;
4546 else if (attr_type == TYPE_FPALU)
4548 rtx pat = PATTERN (insn);
4549 rtx dep_pat = PATTERN (dep_insn);
4550 if (GET_CODE (pat) == PARALLEL)
4552 /* This happens for the fldXs,mb patterns. */
4553 pat = XVECEXP (pat, 0, 0);
4555 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4556 /* If this happens, we have to extend this to schedule
4557 optimally. Return 0 for now. */
4560 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4562 if (! recog_memoized (dep_insn))
4564 switch (get_attr_type (dep_insn))
4568 case TYPE_FPSQRTSGL:
4569 case TYPE_FPSQRTDBL:
4570 /* An ALU flop can't be issued until two cycles before a
4571 preceding divide or sqrt operation has finished if
4572 the target of the ALU flop is any of the sources
4573 (or destination) of the divide or sqrt operation. */
4574 return insn_default_latency (dep_insn) - 2;
4582 /* For other anti dependencies, the cost is 0. */
4585 case REG_DEP_OUTPUT:
4586 /* Output dependency; DEP_INSN writes a register that INSN writes some
4588 if (attr_type == TYPE_FPLOAD)
4590 rtx pat = PATTERN (insn);
4591 rtx dep_pat = PATTERN (dep_insn);
4592 if (GET_CODE (pat) == PARALLEL)
4594 /* This happens for the fldXs,mb patterns. */
4595 pat = XVECEXP (pat, 0, 0);
4597 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4598 /* If this happens, we have to extend this to schedule
4599 optimally. Return 0 for now. */
4602 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4604 if (! recog_memoized (dep_insn))
4606 switch (get_attr_type (dep_insn))
4613 case TYPE_FPSQRTSGL:
4614 case TYPE_FPSQRTDBL:
4615 /* A fpload can't be issued until one cycle before a
4616 preceding arithmetic operation has finished if
4617 the target of the fpload is the destination of the
4618 arithmetic operation.
4620 Exception: For PA7100LC, PA7200 and PA7300, the cost
4621 is 3 cycles, unless they bundle together. We also
4622 pay the penalty if the second insn is a fpload. */
4623 return insn_default_latency (dep_insn) - 1;
4630 else if (attr_type == TYPE_FPALU)
4632 rtx pat = PATTERN (insn);
4633 rtx dep_pat = PATTERN (dep_insn);
4634 if (GET_CODE (pat) == PARALLEL)
4636 /* This happens for the fldXs,mb patterns. */
4637 pat = XVECEXP (pat, 0, 0);
4639 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4640 /* If this happens, we have to extend this to schedule
4641 optimally. Return 0 for now. */
4644 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4646 if (! recog_memoized (dep_insn))
4648 switch (get_attr_type (dep_insn))
4652 case TYPE_FPSQRTSGL:
4653 case TYPE_FPSQRTDBL:
4654 /* An ALU flop can't be issued until two cycles before a
4655 preceding divide or sqrt operation has finished if
4656 the target of the ALU flop is also the target of
4657 the divide or sqrt operation. */
4658 return insn_default_latency (dep_insn) - 2;
4666 /* For other output dependencies, the cost is 0. */
4674 /* Adjust scheduling priorities. We use this to try and keep addil
4675 and the next use of %r1 close together. */
4677 pa_adjust_priority (rtx insn, int priority)
4679 rtx set = single_set (insn);
4683 src = SET_SRC (set);
4684 dest = SET_DEST (set);
4685 if (GET_CODE (src) == LO_SUM
4686 && symbolic_operand (XEXP (src, 1), VOIDmode)
4687 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4690 else if (GET_CODE (src) == MEM
4691 && GET_CODE (XEXP (src, 0)) == LO_SUM
4692 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4693 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4696 else if (GET_CODE (dest) == MEM
4697 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4698 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4699 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4705 /* The 700 can only issue a single insn at a time.
4706 The 7XXX processors can issue two insns at a time.
4707 The 8000 can issue 4 insns at a time. */
4709 pa_issue_rate (void)
4713 case PROCESSOR_700: return 1;
4714 case PROCESSOR_7100: return 2;
4715 case PROCESSOR_7100LC: return 2;
4716 case PROCESSOR_7200: return 2;
4717 case PROCESSOR_7300: return 2;
4718 case PROCESSOR_8000: return 4;
4727 /* Return any length adjustment needed by INSN which already has its length
4728 computed as LENGTH. Return zero if no adjustment is necessary.
4730 For the PA: function calls, millicode calls, and backwards short
4731 conditional branches with unfilled delay slots need an adjustment by +1
4732 (to account for the NOP which will be inserted into the instruction stream).
4734 Also compute the length of an inline block move here as it is too
4735 complicated to express as a length attribute in pa.md. */
4737 pa_adjust_insn_length (rtx insn, int length)
4739 rtx pat = PATTERN (insn);
4741 /* Jumps inside switch tables which have unfilled delay slots need
4743 if (GET_CODE (insn) == JUMP_INSN
4744 && GET_CODE (pat) == PARALLEL
4745 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4747 /* Millicode insn with an unfilled delay slot. */
4748 else if (GET_CODE (insn) == INSN
4749 && GET_CODE (pat) != SEQUENCE
4750 && GET_CODE (pat) != USE
4751 && GET_CODE (pat) != CLOBBER
4752 && get_attr_type (insn) == TYPE_MILLI)
4754 /* Block move pattern. */
4755 else if (GET_CODE (insn) == INSN
4756 && GET_CODE (pat) == PARALLEL
4757 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4758 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4759 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4760 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4761 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4762 return compute_movmem_length (insn) - 4;
4763 /* Block clear pattern. */
4764 else if (GET_CODE (insn) == INSN
4765 && GET_CODE (pat) == PARALLEL
4766 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4767 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4768 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4769 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4770 return compute_clrmem_length (insn) - 4;
4771 /* Conditional branch with an unfilled delay slot. */
4772 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4774 /* Adjust a short backwards conditional with an unfilled delay slot. */
4775 if (GET_CODE (pat) == SET
4777 && JUMP_LABEL (insn) != NULL_RTX
4778 && ! forward_branch_p (insn))
4780 else if (GET_CODE (pat) == PARALLEL
4781 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4784 /* Adjust dbra insn with short backwards conditional branch with
4785 unfilled delay slot -- only for case where counter is in a
4786 general register register. */
4787 else if (GET_CODE (pat) == PARALLEL
4788 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4789 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4790 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4792 && ! forward_branch_p (insn))
4800 /* Print operand X (an rtx) in assembler syntax to file FILE.
4801 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4802 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4805 print_operand (FILE *file, rtx x, int code)
4810 /* Output a 'nop' if there's nothing for the delay slot. */
4811 if (dbr_sequence_length () == 0)
4812 fputs ("\n\tnop", file);
4815 /* Output a nullification completer if there's nothing for the */
4816 /* delay slot or nullification is requested. */
4817 if (dbr_sequence_length () == 0 ||
4819 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4823 /* Print out the second register name of a register pair.
4824 I.e., R (6) => 7. */
4825 fputs (reg_names[REGNO (x) + 1], file);
4828 /* A register or zero. */
4830 || (x == CONST0_RTX (DFmode))
4831 || (x == CONST0_RTX (SFmode)))
4833 fputs ("%r0", file);
4839 /* A register or zero (floating point). */
4841 || (x == CONST0_RTX (DFmode))
4842 || (x == CONST0_RTX (SFmode)))
4844 fputs ("%fr0", file);
4853 xoperands[0] = XEXP (XEXP (x, 0), 0);
4854 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4855 output_global_address (file, xoperands[1], 0);
4856 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4860 case 'C': /* Plain (C)ondition */
4862 switch (GET_CODE (x))
4865 fputs ("=", file); break;
4867 fputs ("<>", file); break;
4869 fputs (">", file); break;
4871 fputs (">=", file); break;
4873 fputs (">>=", file); break;
4875 fputs (">>", file); break;
4877 fputs ("<", file); break;
4879 fputs ("<=", file); break;
4881 fputs ("<<=", file); break;
4883 fputs ("<<", file); break;
4888 case 'N': /* Condition, (N)egated */
4889 switch (GET_CODE (x))
4892 fputs ("<>", file); break;
4894 fputs ("=", file); break;
4896 fputs ("<=", file); break;
4898 fputs ("<", file); break;
4900 fputs ("<<", file); break;
4902 fputs ("<<=", file); break;
4904 fputs (">=", file); break;
4906 fputs (">", file); break;
4908 fputs (">>", file); break;
4910 fputs (">>=", file); break;
4915 /* For floating point comparisons. Note that the output
4916 predicates are the complement of the desired mode. The
4917 conditions for GT, GE, LT, LE and LTGT cause an invalid
4918 operation exception if the result is unordered and this
4919 exception is enabled in the floating-point status register. */
4921 switch (GET_CODE (x))
4924 fputs ("!=", file); break;
4926 fputs ("=", file); break;
4928 fputs ("!>", file); break;
4930 fputs ("!>=", file); break;
4932 fputs ("!<", file); break;
4934 fputs ("!<=", file); break;
4936 fputs ("!<>", file); break;
4938 fputs ("!?<=", file); break;
4940 fputs ("!?<", file); break;
4942 fputs ("!?>=", file); break;
4944 fputs ("!?>", file); break;
4946 fputs ("!?=", file); break;
4948 fputs ("!?", file); break;
4950 fputs ("?", file); break;
4955 case 'S': /* Condition, operands are (S)wapped. */
4956 switch (GET_CODE (x))
4959 fputs ("=", file); break;
4961 fputs ("<>", file); break;
4963 fputs ("<", file); break;
4965 fputs ("<=", file); break;
4967 fputs ("<<=", file); break;
4969 fputs ("<<", file); break;
4971 fputs (">", file); break;
4973 fputs (">=", file); break;
4975 fputs (">>=", file); break;
4977 fputs (">>", file); break;
4982 case 'B': /* Condition, (B)oth swapped and negate. */
4983 switch (GET_CODE (x))
4986 fputs ("<>", file); break;
4988 fputs ("=", file); break;
4990 fputs (">=", file); break;
4992 fputs (">", file); break;
4994 fputs (">>", file); break;
4996 fputs (">>=", file); break;
4998 fputs ("<=", file); break;
5000 fputs ("<", file); break;
5002 fputs ("<<", file); break;
5004 fputs ("<<=", file); break;
5010 gcc_assert (GET_CODE (x) == CONST_INT);
5011 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5014 gcc_assert (GET_CODE (x) == CONST_INT);
5015 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5018 gcc_assert (GET_CODE (x) == CONST_INT);
5019 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5022 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5023 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5026 gcc_assert (GET_CODE (x) == CONST_INT);
5027 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5030 gcc_assert (GET_CODE (x) == CONST_INT);
5031 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5034 if (GET_CODE (x) == CONST_INT)
5039 switch (GET_CODE (XEXP (x, 0)))
5043 if (ASSEMBLER_DIALECT == 0)
5044 fputs ("s,mb", file);
5046 fputs (",mb", file);
5050 if (ASSEMBLER_DIALECT == 0)
5051 fputs ("s,ma", file);
5053 fputs (",ma", file);
5056 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5057 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5059 if (ASSEMBLER_DIALECT == 0)
5062 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5063 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5065 if (ASSEMBLER_DIALECT == 0)
5066 fputs ("x,s", file);
5070 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5074 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5080 output_global_address (file, x, 0);
5083 output_global_address (file, x, 1);
5085 case 0: /* Don't do anything special */
5090 compute_zdepwi_operands (INTVAL (x), op);
5091 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5097 compute_zdepdi_operands (INTVAL (x), op);
5098 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5102 /* We can get here from a .vtable_inherit due to our
5103 CONSTANT_ADDRESS_P rejecting perfectly good constant
5109 if (GET_CODE (x) == REG)
5111 fputs (reg_names [REGNO (x)], file);
5112 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5118 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5119 && (REGNO (x) & 1) == 0)
5122 else if (GET_CODE (x) == MEM)
5124 int size = GET_MODE_SIZE (GET_MODE (x));
5125 rtx base = NULL_RTX;
5126 switch (GET_CODE (XEXP (x, 0)))
5130 base = XEXP (XEXP (x, 0), 0);
5131 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5135 base = XEXP (XEXP (x, 0), 0);
5136 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5139 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5140 fprintf (file, "%s(%s)",
5141 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5142 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5143 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5144 fprintf (file, "%s(%s)",
5145 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5146 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5147 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5148 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5150 /* Because the REG_POINTER flag can get lost during reload,
5151 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5152 index and base registers in the combined move patterns. */
5153 rtx base = XEXP (XEXP (x, 0), 1);
5154 rtx index = XEXP (XEXP (x, 0), 0);
5156 fprintf (file, "%s(%s)",
5157 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5160 output_address (XEXP (x, 0));
5163 output_address (XEXP (x, 0));
5168 output_addr_const (file, x);
5171 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5174 output_global_address (FILE *file, rtx x, int round_constant)
5177 /* Imagine (high (const (plus ...))). */
5178 if (GET_CODE (x) == HIGH)
5181 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5182 output_addr_const (file, x);
5183 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5185 output_addr_const (file, x);
5186 fputs ("-$global$", file);
5188 else if (GET_CODE (x) == CONST)
5190 const char *sep = "";
5191 int offset = 0; /* assembler wants -$global$ at end */
5192 rtx base = NULL_RTX;
5194 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5197 base = XEXP (XEXP (x, 0), 0);
5198 output_addr_const (file, base);
5201 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5207 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5210 base = XEXP (XEXP (x, 0), 1);
5211 output_addr_const (file, base);
5214 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5220 /* How bogus. The compiler is apparently responsible for
5221 rounding the constant if it uses an LR field selector.
5223 The linker and/or assembler seem a better place since
5224 they have to do this kind of thing already.
5226 If we fail to do this, HP's optimizing linker may eliminate
5227 an addil, but not update the ldw/stw/ldo instruction that
5228 uses the result of the addil. */
5230 offset = ((offset + 0x1000) & ~0x1fff);
5232 switch (GET_CODE (XEXP (x, 0)))
5245 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5253 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5254 fputs ("-$global$", file);
5256 fprintf (file, "%s%d", sep, offset);
5259 output_addr_const (file, x);
5262 /* Output boilerplate text to appear at the beginning of the file.
5263 There are several possible versions. */
5264 #define aputs(x) fputs(x, asm_out_file)
5266 pa_file_start_level (void)
5269 aputs ("\t.LEVEL 2.0w\n");
5270 else if (TARGET_PA_20)
5271 aputs ("\t.LEVEL 2.0\n");
5272 else if (TARGET_PA_11)
5273 aputs ("\t.LEVEL 1.1\n");
5275 aputs ("\t.LEVEL 1.0\n");
5279 pa_file_start_space (int sortspace)
5281 aputs ("\t.SPACE $PRIVATE$");
5284 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5285 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5286 "\n\t.SPACE $TEXT$");
5289 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5290 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5294 pa_file_start_file (int want_version)
5296 if (write_symbols != NO_DEBUG)
5298 output_file_directive (asm_out_file, main_input_filename);
5300 aputs ("\t.version\t\"01.01\"\n");
5305 pa_file_start_mcount (const char *aswhat)
5308 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5312 pa_elf_file_start (void)
5314 pa_file_start_level ();
5315 pa_file_start_mcount ("ENTRY");
5316 pa_file_start_file (0);
5320 pa_som_file_start (void)
5322 pa_file_start_level ();
5323 pa_file_start_space (0);
5324 aputs ("\t.IMPORT $global$,DATA\n"
5325 "\t.IMPORT $$dyncall,MILLICODE\n");
5326 pa_file_start_mcount ("CODE");
5327 pa_file_start_file (0);
5331 pa_linux_file_start (void)
5333 pa_file_start_file (1);
5334 pa_file_start_level ();
5335 pa_file_start_mcount ("CODE");
5339 pa_hpux64_gas_file_start (void)
5341 pa_file_start_level ();
5342 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5344 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5346 pa_file_start_file (1);
5350 pa_hpux64_hpas_file_start (void)
5352 pa_file_start_level ();
5353 pa_file_start_space (1);
5354 pa_file_start_mcount ("CODE");
5355 pa_file_start_file (0);
5359 /* Search the deferred plabel list for SYMBOL and return its internal
5360 label. If an entry for SYMBOL is not found, a new entry is created. */
5363 get_deferred_plabel (rtx symbol)
5365 const char *fname = XSTR (symbol, 0);
5368 /* See if we have already put this function on the list of deferred
5369 plabels. This list is generally small, so a liner search is not
5370 too ugly. If it proves too slow replace it with something faster. */
5371 for (i = 0; i < n_deferred_plabels; i++)
5372 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5375 /* If the deferred plabel list is empty, or this entry was not found
5376 on the list, create a new entry on the list. */
5377 if (deferred_plabels == NULL || i == n_deferred_plabels)
5381 if (deferred_plabels == 0)
5382 deferred_plabels = (struct deferred_plabel *)
5383 ggc_alloc (sizeof (struct deferred_plabel));
5385 deferred_plabels = (struct deferred_plabel *)
5386 ggc_realloc (deferred_plabels,
5387 ((n_deferred_plabels + 1)
5388 * sizeof (struct deferred_plabel)));
5390 i = n_deferred_plabels++;
5391 deferred_plabels[i].internal_label = gen_label_rtx ();
5392 deferred_plabels[i].symbol = symbol;
5394 /* Gross. We have just implicitly taken the address of this
5395 function. Mark it in the same manner as assemble_name. */
5396 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5398 mark_referenced (id);
5401 return deferred_plabels[i].internal_label;
5405 output_deferred_plabels (void)
5409 /* If we have some deferred plabels, then we need to switch into the
5410 data or readonly data section, and align it to a 4 byte boundary
5411 before outputting the deferred plabels. */
5412 if (n_deferred_plabels)
5414 switch_to_section (flag_pic ? data_section : readonly_data_section);
5415 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5418 /* Now output the deferred plabels. */
5419 for (i = 0; i < n_deferred_plabels; i++)
5421 targetm.asm_out.internal_label (asm_out_file, "L",
5422 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5423 assemble_integer (deferred_plabels[i].symbol,
5424 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5428 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5429 /* Initialize optabs to point to HPUX long double emulation routines. */
5431 pa_hpux_init_libfuncs (void)
5433 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5434 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5435 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5436 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5437 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5438 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5439 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5440 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5441 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5443 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5444 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5445 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5446 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5447 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5448 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5449 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5451 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5452 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5453 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5454 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5456 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5457 ? "__U_Qfcnvfxt_quad_to_sgl"
5458 : "_U_Qfcnvfxt_quad_to_sgl");
5459 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5460 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5461 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5463 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5464 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5465 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5466 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5470 /* HP's millicode routines mean something special to the assembler.
5471 Keep track of which ones we have used. */
5473 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5474 static void import_milli (enum millicodes);
5475 static char imported[(int) end1000];
5476 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5477 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5478 #define MILLI_START 10
5481 import_milli (enum millicodes code)
5483 char str[sizeof (import_string)];
5485 if (!imported[(int) code])
5487 imported[(int) code] = 1;
5488 strcpy (str, import_string);
5489 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5490 output_asm_insn (str, 0);
5494 /* The register constraints have put the operands and return value in
5495 the proper registers. */
5498 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5500 import_milli (mulI);
5501 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5504 /* Emit the rtl for doing a division by a constant. */
5506 /* Do magic division millicodes exist for this value? */
5507 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5509 /* We'll use an array to keep track of the magic millicodes and
5510 whether or not we've used them already. [n][0] is signed, [n][1] is
5513 static int div_milli[16][2];
5516 emit_hpdiv_const (rtx *operands, int unsignedp)
5518 if (GET_CODE (operands[2]) == CONST_INT
5519 && INTVAL (operands[2]) > 0
5520 && INTVAL (operands[2]) < 16
5521 && magic_milli[INTVAL (operands[2])])
5523 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5525 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5529 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5530 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5532 gen_rtx_REG (SImode, 26),
5534 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5535 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5536 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5537 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5538 gen_rtx_CLOBBER (VOIDmode, ret))));
5539 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5546 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5550 /* If the divisor is a constant, try to use one of the special
5552 if (GET_CODE (operands[0]) == CONST_INT)
5554 static char buf[100];
5555 divisor = INTVAL (operands[0]);
5556 if (!div_milli[divisor][unsignedp])
5558 div_milli[divisor][unsignedp] = 1;
5560 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5562 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5566 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5567 INTVAL (operands[0]));
5568 return output_millicode_call (insn,
5569 gen_rtx_SYMBOL_REF (SImode, buf));
5573 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5574 INTVAL (operands[0]));
5575 return output_millicode_call (insn,
5576 gen_rtx_SYMBOL_REF (SImode, buf));
5579 /* Divisor isn't a special constant. */
5584 import_milli (divU);
5585 return output_millicode_call (insn,
5586 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5590 import_milli (divI);
5591 return output_millicode_call (insn,
5592 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5597 /* Output a $$rem millicode to do mod. */
5600 output_mod_insn (int unsignedp, rtx insn)
5604 import_milli (remU);
5605 return output_millicode_call (insn,
5606 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5610 import_milli (remI);
5611 return output_millicode_call (insn,
5612 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5617 output_arg_descriptor (rtx call_insn)
5619 const char *arg_regs[4];
5620 enum machine_mode arg_mode;
5622 int i, output_flag = 0;
5625 /* We neither need nor want argument location descriptors for the
5626 64bit runtime environment or the ELF32 environment. */
5627 if (TARGET_64BIT || TARGET_ELF32)
5630 for (i = 0; i < 4; i++)
5633 /* Specify explicitly that no argument relocations should take place
5634 if using the portable runtime calling conventions. */
5635 if (TARGET_PORTABLE_RUNTIME)
5637 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5642 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5643 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5644 link; link = XEXP (link, 1))
5646 rtx use = XEXP (link, 0);
5648 if (! (GET_CODE (use) == USE
5649 && GET_CODE (XEXP (use, 0)) == REG
5650 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5653 arg_mode = GET_MODE (XEXP (use, 0));
5654 regno = REGNO (XEXP (use, 0));
5655 if (regno >= 23 && regno <= 26)
5657 arg_regs[26 - regno] = "GR";
5658 if (arg_mode == DImode)
5659 arg_regs[25 - regno] = "GR";
5661 else if (regno >= 32 && regno <= 39)
5663 if (arg_mode == SFmode)
5664 arg_regs[(regno - 32) / 2] = "FR";
5667 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5668 arg_regs[(regno - 34) / 2] = "FR";
5669 arg_regs[(regno - 34) / 2 + 1] = "FU";
5671 arg_regs[(regno - 34) / 2] = "FU";
5672 arg_regs[(regno - 34) / 2 + 1] = "FR";
5677 fputs ("\t.CALL ", asm_out_file);
5678 for (i = 0; i < 4; i++)
5683 fputc (',', asm_out_file);
5684 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5687 fputc ('\n', asm_out_file);
5690 static enum reg_class
5691 pa_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
5692 enum machine_mode mode, secondary_reload_info *sri)
5694 int is_symbolic, regno;
5696 /* Handle the easy stuff first. */
5697 if (rclass == R1_REGS)
5703 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5709 /* If we have something like (mem (mem (...)), we can safely assume the
5710 inner MEM will end up in a general register after reloading, so there's
5711 no need for a secondary reload. */
5712 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5715 /* Trying to load a constant into a FP register during PIC code
5716 generation requires %r1 as a scratch register. */
5718 && (mode == SImode || mode == DImode)
5719 && FP_REG_CLASS_P (rclass)
5720 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5722 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5723 : CODE_FOR_reload_indi_r1);
5727 /* Profiling showed the PA port spends about 1.3% of its compilation
5728 time in true_regnum from calls inside pa_secondary_reload_class. */
5729 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5730 regno = true_regnum (x);
5732 /* In order to allow 14-bit displacements in integer loads and stores,
5733 we need to prevent reload from generating out of range integer mode
5734 loads and stores to the floating point registers. Previously, we
5735 used to call for a secondary reload and have emit_move_sequence()
5736 fix the instruction sequence. However, reload occasionally wouldn't
5737 generate the reload and we would end up with an invalid REG+D memory
5738 address. So, now we use an intermediate general register for most
5739 memory loads and stores. */
5740 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5741 && GET_MODE_CLASS (mode) == MODE_INT
5742 && FP_REG_CLASS_P (rclass))
5744 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5745 the secondary reload needed for a pseudo. It never passes a
5747 if (GET_CODE (x) == MEM)
5751 /* We don't need an intermediate for indexed and LO_SUM DLT
5752 memory addresses. When INT14_OK_STRICT is true, it might
5753 appear that we could directly allow register indirect
5754 memory addresses. However, this doesn't work because we
5755 don't support SUBREGs in floating-point register copies
5756 and reload doesn't tell us when it's going to use a SUBREG. */
5757 if (IS_INDEX_ADDR_P (x)
5758 || IS_LO_SUM_DLT_ADDR_P (x))
5761 /* Otherwise, we need an intermediate general register. */
5762 return GENERAL_REGS;
5765 /* Request a secondary reload with a general scratch register
5766 for everthing else. ??? Could symbolic operands be handled
5767 directly when generating non-pic PA 2.0 code? */
5768 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5772 /* We need a secondary register (GPR) for copies between the SAR
5773 and anything other than a general register. */
5774 if (rclass == SHIFT_REGS && (regno <= 0 || regno >= 32))
5776 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5780 /* A SAR<->FP register copy requires a secondary register (GPR) as
5781 well as secondary memory. */
5782 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5783 && (REGNO_REG_CLASS (regno) == SHIFT_REGS
5784 && FP_REG_CLASS_P (rclass)))
5786 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5790 /* Secondary reloads of symbolic operands require %r1 as a scratch
5791 register when we're generating PIC code and when the operand isn't
5793 if (GET_CODE (x) == HIGH)
5796 /* Profiling has showed GCC spends about 2.6% of its compilation
5797 time in symbolic_operand from calls inside pa_secondary_reload_class.
5798 So, we use an inline copy to avoid useless work. */
5799 switch (GET_CODE (x))
5804 is_symbolic = !SYMBOL_REF_TLS_MODEL (x);
5811 is_symbolic = (GET_CODE (op) == PLUS
5812 && ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
5813 && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
5814 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
5815 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5822 if (is_symbolic && (flag_pic || !read_only_operand (x, VOIDmode)))
5824 gcc_assert (mode == SImode || mode == DImode);
5825 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5826 : CODE_FOR_reload_indi_r1);
5832 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
5833 is only marked as live on entry by df-scan when it is a fixed
5834 register. It isn't a fixed register in the 64-bit runtime,
5835 so we need to mark it here. */
5838 pa_extra_live_on_entry (bitmap regs)
5841 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
5844 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
5845 to prevent it from being deleted. */
5848 pa_eh_return_handler_rtx (void)
5852 tmp = gen_rtx_PLUS (word_mode, frame_pointer_rtx,
5853 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
5854 tmp = gen_rtx_MEM (word_mode, tmp);
5859 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5860 by invisible reference. As a GCC extension, we also pass anything
5861 with a zero or variable size by reference.
5863 The 64-bit runtime does not describe passing any types by invisible
5864 reference. The internals of GCC can't currently handle passing
5865 empty structures, and zero or variable length arrays when they are
5866 not passed entirely on the stack or by reference. Thus, as a GCC
5867 extension, we pass these types by reference. The HP compiler doesn't
5868 support these types, so hopefully there shouldn't be any compatibility
5869 issues. This may have to be revisited when HP releases a C99 compiler
5870 or updates the ABI. */
5873 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5874 enum machine_mode mode, const_tree type,
5875 bool named ATTRIBUTE_UNUSED)
5880 size = int_size_in_bytes (type);
5882 size = GET_MODE_SIZE (mode);
5887 return size <= 0 || size > 8;
5891 function_arg_padding (enum machine_mode mode, const_tree type)
5896 && (AGGREGATE_TYPE_P (type)
5897 || TREE_CODE (type) == COMPLEX_TYPE
5898 || TREE_CODE (type) == VECTOR_TYPE)))
5900 /* Return none if justification is not required. */
5902 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5903 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5906 /* The directions set here are ignored when a BLKmode argument larger
5907 than a word is placed in a register. Different code is used for
5908 the stack and registers. This makes it difficult to have a
5909 consistent data representation for both the stack and registers.
5910 For both runtimes, the justification and padding for arguments on
5911 the stack and in registers should be identical. */
5913 /* The 64-bit runtime specifies left justification for aggregates. */
5916 /* The 32-bit runtime architecture specifies right justification.
5917 When the argument is passed on the stack, the argument is padded
5918 with garbage on the left. The HP compiler pads with zeros. */
5922 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5929 /* Do what is necessary for `va_start'. We look at the current function
5930 to determine if stdargs or varargs is used and fill in an initial
5931 va_list. A pointer to this constructor is returned. */
5934 hppa_builtin_saveregs (void)
5937 tree fntype = TREE_TYPE (current_function_decl);
5938 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5939 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5940 != void_type_node)))
5941 ? UNITS_PER_WORD : 0);
5944 offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
5946 offset = crtl->args.arg_offset_rtx;
5952 /* Adjust for varargs/stdarg differences. */
5954 offset = plus_constant (crtl->args.arg_offset_rtx, -argadj);
5956 offset = crtl->args.arg_offset_rtx;
5958 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5959 from the incoming arg pointer and growing to larger addresses. */
5960 for (i = 26, off = -64; i >= 19; i--, off += 8)
5961 emit_move_insn (gen_rtx_MEM (word_mode,
5962 plus_constant (arg_pointer_rtx, off)),
5963 gen_rtx_REG (word_mode, i));
5965 /* The incoming args pointer points just beyond the flushback area;
5966 normally this is not a serious concern. However, when we are doing
5967 varargs/stdargs we want to make the arg pointer point to the start
5968 of the incoming argument area. */
5969 emit_move_insn (virtual_incoming_args_rtx,
5970 plus_constant (arg_pointer_rtx, -64));
5972 /* Now return a pointer to the first anonymous argument. */
5973 return copy_to_reg (expand_binop (Pmode, add_optab,
5974 virtual_incoming_args_rtx,
5975 offset, 0, 0, OPTAB_LIB_WIDEN));
5978 /* Store general registers on the stack. */
5979 dest = gen_rtx_MEM (BLKmode,
5980 plus_constant (crtl->args.internal_arg_pointer,
5982 set_mem_alias_set (dest, get_varargs_alias_set ());
5983 set_mem_align (dest, BITS_PER_WORD);
5984 move_block_from_reg (23, dest, 4);
5986 /* move_block_from_reg will emit code to store the argument registers
5987 individually as scalar stores.
5989 However, other insns may later load from the same addresses for
5990 a structure load (passing a struct to a varargs routine).
5992 The alias code assumes that such aliasing can never happen, so we
5993 have to keep memory referencing insns from moving up beyond the
5994 last argument register store. So we emit a blockage insn here. */
5995 emit_insn (gen_blockage ());
5997 return copy_to_reg (expand_binop (Pmode, add_optab,
5998 crtl->args.internal_arg_pointer,
5999 offset, 0, 0, OPTAB_LIB_WIDEN));
6003 hppa_va_start (tree valist, rtx nextarg)
6005 nextarg = expand_builtin_saveregs ();
6006 std_expand_builtin_va_start (valist, nextarg);
6010 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6015 /* Args grow upward. We can use the generic routines. */
6016 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6018 else /* !TARGET_64BIT */
6020 tree ptr = build_pointer_type (type);
6023 unsigned int size, ofs;
6026 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6030 ptr = build_pointer_type (type);
6032 size = int_size_in_bytes (type);
6033 valist_type = TREE_TYPE (valist);
6035 /* Args grow down. Not handled by generic routines. */
6037 u = fold_convert (sizetype, size_in_bytes (type));
6038 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6039 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
6041 /* Copied from va-pa.h, but we probably don't need to align to
6042 word size, since we generate and preserve that invariant. */
6043 u = size_int (size > 4 ? -8 : -4);
6044 t = fold_convert (sizetype, t);
6045 t = build2 (BIT_AND_EXPR, sizetype, t, u);
6046 t = fold_convert (valist_type, t);
6048 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6050 ofs = (8 - size) % 4;
6054 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
6057 t = fold_convert (ptr, t);
6058 t = build_va_arg_indirect_ref (t);
6061 t = build_va_arg_indirect_ref (t);
6067 /* True if MODE is valid for the target. By "valid", we mean able to
6068 be manipulated in non-trivial ways. In particular, this means all
6069 the arithmetic is supported.
6071 Currently, TImode is not valid as the HP 64-bit runtime documentation
6072 doesn't document the alignment and calling conventions for this type.
6073 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6074 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6077 pa_scalar_mode_supported_p (enum machine_mode mode)
6079 int precision = GET_MODE_PRECISION (mode);
6081 switch (GET_MODE_CLASS (mode))
6083 case MODE_PARTIAL_INT:
6085 if (precision == CHAR_TYPE_SIZE)
6087 if (precision == SHORT_TYPE_SIZE)
6089 if (precision == INT_TYPE_SIZE)
6091 if (precision == LONG_TYPE_SIZE)
6093 if (precision == LONG_LONG_TYPE_SIZE)
6098 if (precision == FLOAT_TYPE_SIZE)
6100 if (precision == DOUBLE_TYPE_SIZE)
6102 if (precision == LONG_DOUBLE_TYPE_SIZE)
6106 case MODE_DECIMAL_FLOAT:
6114 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6115 it branches to the next real instruction. Otherwise, return FALSE. */
6118 branch_to_delay_slot_p (rtx insn)
6120 if (dbr_sequence_length ())
6123 return next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn);
6126 /* Return TRUE if INSN, a jump insn, needs a nop in its delay slot.
6128 This occurs when INSN has an unfilled delay slot and is followed
6129 by an ASM_INPUT. Disaster can occur if the ASM_INPUT is empty and
6130 the jump branches into the delay slot. So, we add a nop in the delay
6131 slot just to be safe. This messes up our instruction count, but we
6132 don't know how big the ASM_INPUT insn is anyway. */
6135 branch_needs_nop_p (rtx insn)
6139 if (dbr_sequence_length ())
6142 next_insn = next_real_insn (insn);
6143 return GET_CODE (PATTERN (next_insn)) == ASM_INPUT;
6146 /* This routine handles all the normal conditional branch sequences we
6147 might need to generate. It handles compare immediate vs compare
6148 register, nullification of delay slots, varying length branches,
6149 negated branches, and all combinations of the above. It returns the
6150 output appropriate to emit the branch corresponding to all given
6154 output_cbranch (rtx *operands, int negated, rtx insn)
6156 static char buf[100];
6158 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6159 int length = get_attr_length (insn);
6162 /* A conditional branch to the following instruction (e.g. the delay slot)
6163 is asking for a disaster. This can happen when not optimizing and
6164 when jump optimization fails.
6166 While it is usually safe to emit nothing, this can fail if the
6167 preceding instruction is a nullified branch with an empty delay
6168 slot and the same branch target as this branch. We could check
6169 for this but jump optimization should eliminate nop jumps. It
6170 is always safe to emit a nop. */
6171 if (branch_to_delay_slot_p (insn))
6174 /* The doubleword form of the cmpib instruction doesn't have the LEU
6175 and GTU conditions while the cmpb instruction does. Since we accept
6176 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6177 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6178 operands[2] = gen_rtx_REG (DImode, 0);
6179 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6180 operands[1] = gen_rtx_REG (DImode, 0);
6182 /* If this is a long branch with its delay slot unfilled, set `nullify'
6183 as it can nullify the delay slot and save a nop. */
6184 if (length == 8 && dbr_sequence_length () == 0)
6187 /* If this is a short forward conditional branch which did not get
6188 its delay slot filled, the delay slot can still be nullified. */
6189 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6190 nullify = forward_branch_p (insn);
6192 /* A forward branch over a single nullified insn can be done with a
6193 comclr instruction. This avoids a single cycle penalty due to
6194 mis-predicted branch if we fall through (branch not taken). */
6196 && next_real_insn (insn) != 0
6197 && get_attr_length (next_real_insn (insn)) == 4
6198 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6204 /* All short conditional branches except backwards with an unfilled
6208 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6210 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6211 if (GET_MODE (operands[1]) == DImode)
6214 strcat (buf, "%B3");
6216 strcat (buf, "%S3");
6218 strcat (buf, " %2,%r1,%%r0");
6221 if (branch_needs_nop_p (insn))
6222 strcat (buf, ",n %2,%r1,%0%#");
6224 strcat (buf, ",n %2,%r1,%0");
6227 strcat (buf, " %2,%r1,%0");
6230 /* All long conditionals. Note a short backward branch with an
6231 unfilled delay slot is treated just like a long backward branch
6232 with an unfilled delay slot. */
6234 /* Handle weird backwards branch with a filled delay slot
6235 which is nullified. */
6236 if (dbr_sequence_length () != 0
6237 && ! forward_branch_p (insn)
6240 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6241 if (GET_MODE (operands[1]) == DImode)
6244 strcat (buf, "%S3");
6246 strcat (buf, "%B3");
6247 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6249 /* Handle short backwards branch with an unfilled delay slot.
6250 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6251 taken and untaken branches. */
6252 else if (dbr_sequence_length () == 0
6253 && ! forward_branch_p (insn)
6254 && INSN_ADDRESSES_SET_P ()
6255 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6256 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6258 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6259 if (GET_MODE (operands[1]) == DImode)
6262 strcat (buf, "%B3 %2,%r1,%0%#");
6264 strcat (buf, "%S3 %2,%r1,%0%#");
6268 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6269 if (GET_MODE (operands[1]) == DImode)
6272 strcat (buf, "%S3");
6274 strcat (buf, "%B3");
6276 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6278 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6283 /* The reversed conditional branch must branch over one additional
6284 instruction if the delay slot is filled and needs to be extracted
6285 by output_lbranch. If the delay slot is empty or this is a
6286 nullified forward branch, the instruction after the reversed
6287 condition branch must be nullified. */
6288 if (dbr_sequence_length () == 0
6289 || (nullify && forward_branch_p (insn)))
6293 operands[4] = GEN_INT (length);
6298 operands[4] = GEN_INT (length + 4);
6301 /* Create a reversed conditional branch which branches around
6302 the following insns. */
6303 if (GET_MODE (operands[1]) != DImode)
6309 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6312 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6318 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6321 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6330 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6333 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6339 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6342 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6346 output_asm_insn (buf, operands);
6347 return output_lbranch (operands[0], insn, xdelay);
6352 /* This routine handles output of long unconditional branches that
6353 exceed the maximum range of a simple branch instruction. Since
6354 we don't have a register available for the branch, we save register
6355 %r1 in the frame marker, load the branch destination DEST into %r1,
6356 execute the branch, and restore %r1 in the delay slot of the branch.
6358 Since long branches may have an insn in the delay slot and the
6359 delay slot is used to restore %r1, we in general need to extract
6360 this insn and execute it before the branch. However, to facilitate
6361 use of this function by conditional branches, we also provide an
6362 option to not extract the delay insn so that it will be emitted
6363 after the long branch. So, if there is an insn in the delay slot,
6364 it is extracted if XDELAY is nonzero.
6366 The lengths of the various long-branch sequences are 20, 16 and 24
6367 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6370 output_lbranch (rtx dest, rtx insn, int xdelay)
6374 xoperands[0] = dest;
6376 /* First, free up the delay slot. */
6377 if (xdelay && dbr_sequence_length () != 0)
6379 /* We can't handle a jump in the delay slot. */
6380 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6382 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6385 /* Now delete the delay insn. */
6386 SET_INSN_DELETED (NEXT_INSN (insn));
6389 /* Output an insn to save %r1. The runtime documentation doesn't
6390 specify whether the "Clean Up" slot in the callers frame can
6391 be clobbered by the callee. It isn't copied by HP's builtin
6392 alloca, so this suggests that it can be clobbered if necessary.
6393 The "Static Link" location is copied by HP builtin alloca, so
6394 we avoid using it. Using the cleanup slot might be a problem
6395 if we have to interoperate with languages that pass cleanup
6396 information. However, it should be possible to handle these
6397 situations with GCC's asm feature.
6399 The "Current RP" slot is reserved for the called procedure, so
6400 we try to use it when we don't have a frame of our own. It's
6401 rather unlikely that we won't have a frame when we need to emit
6404 Really the way to go long term is a register scavenger; goto
6405 the target of the jump and find a register which we can use
6406 as a scratch to hold the value in %r1. Then, we wouldn't have
6407 to free up the delay slot or clobber a slot that may be needed
6408 for other purposes. */
6411 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6412 /* Use the return pointer slot in the frame marker. */
6413 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6415 /* Use the slot at -40 in the frame marker since HP builtin
6416 alloca doesn't copy it. */
6417 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6421 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6422 /* Use the return pointer slot in the frame marker. */
6423 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6425 /* Use the "Clean Up" slot in the frame marker. In GCC,
6426 the only other use of this location is for copying a
6427 floating point double argument from a floating-point
6428 register to two general registers. The copy is done
6429 as an "atomic" operation when outputting a call, so it
6430 won't interfere with our using the location here. */
6431 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6434 if (TARGET_PORTABLE_RUNTIME)
6436 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6437 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6438 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6442 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6443 if (TARGET_SOM || !TARGET_GAS)
6445 xoperands[1] = gen_label_rtx ();
6446 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6447 targetm.asm_out.internal_label (asm_out_file, "L",
6448 CODE_LABEL_NUMBER (xoperands[1]));
6449 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6453 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6454 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6456 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6459 /* Now output a very long branch to the original target. */
6460 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6462 /* Now restore the value of %r1 in the delay slot. */
6465 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6466 return "ldd -16(%%r30),%%r1";
6468 return "ldd -40(%%r30),%%r1";
6472 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6473 return "ldw -20(%%r30),%%r1";
6475 return "ldw -12(%%r30),%%r1";
6479 /* This routine handles all the branch-on-bit conditional branch sequences we
6480 might need to generate. It handles nullification of delay slots,
6481 varying length branches, negated branches and all combinations of the
6482 above. it returns the appropriate output template to emit the branch. */
6485 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6487 static char buf[100];
6489 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6490 int length = get_attr_length (insn);
6493 /* A conditional branch to the following instruction (e.g. the delay slot) is
6494 asking for a disaster. I do not think this can happen as this pattern
6495 is only used when optimizing; jump optimization should eliminate the
6496 jump. But be prepared just in case. */
6498 if (branch_to_delay_slot_p (insn))
6501 /* If this is a long branch with its delay slot unfilled, set `nullify'
6502 as it can nullify the delay slot and save a nop. */
6503 if (length == 8 && dbr_sequence_length () == 0)
6506 /* If this is a short forward conditional branch which did not get
6507 its delay slot filled, the delay slot can still be nullified. */
6508 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6509 nullify = forward_branch_p (insn);
6511 /* A forward branch over a single nullified insn can be done with a
6512 extrs instruction. This avoids a single cycle penalty due to
6513 mis-predicted branch if we fall through (branch not taken). */
6516 && next_real_insn (insn) != 0
6517 && get_attr_length (next_real_insn (insn)) == 4
6518 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6525 /* All short conditional branches except backwards with an unfilled
6529 strcpy (buf, "{extrs,|extrw,s,}");
6531 strcpy (buf, "bb,");
6532 if (useskip && GET_MODE (operands[0]) == DImode)
6533 strcpy (buf, "extrd,s,*");
6534 else if (GET_MODE (operands[0]) == DImode)
6535 strcpy (buf, "bb,*");
6536 if ((which == 0 && negated)
6537 || (which == 1 && ! negated))
6542 strcat (buf, " %0,%1,1,%%r0");
6543 else if (nullify && negated)
6545 if (branch_needs_nop_p (insn))
6546 strcat (buf, ",n %0,%1,%3%#");
6548 strcat (buf, ",n %0,%1,%3");
6550 else if (nullify && ! negated)
6552 if (branch_needs_nop_p (insn))
6553 strcat (buf, ",n %0,%1,%2%#");
6555 strcat (buf, ",n %0,%1,%2");
6557 else if (! nullify && negated)
6558 strcat (buf, " %0,%1,%3");
6559 else if (! nullify && ! negated)
6560 strcat (buf, " %0,%1,%2");
6563 /* All long conditionals. Note a short backward branch with an
6564 unfilled delay slot is treated just like a long backward branch
6565 with an unfilled delay slot. */
6567 /* Handle weird backwards branch with a filled delay slot
6568 which is nullified. */
6569 if (dbr_sequence_length () != 0
6570 && ! forward_branch_p (insn)
6573 strcpy (buf, "bb,");
6574 if (GET_MODE (operands[0]) == DImode)
6576 if ((which == 0 && negated)
6577 || (which == 1 && ! negated))
6582 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6584 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6586 /* Handle short backwards branch with an unfilled delay slot.
6587 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6588 taken and untaken branches. */
6589 else if (dbr_sequence_length () == 0
6590 && ! forward_branch_p (insn)
6591 && INSN_ADDRESSES_SET_P ()
6592 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6593 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6595 strcpy (buf, "bb,");
6596 if (GET_MODE (operands[0]) == DImode)
6598 if ((which == 0 && negated)
6599 || (which == 1 && ! negated))
6604 strcat (buf, " %0,%1,%3%#");
6606 strcat (buf, " %0,%1,%2%#");
6610 if (GET_MODE (operands[0]) == DImode)
6611 strcpy (buf, "extrd,s,*");
6613 strcpy (buf, "{extrs,|extrw,s,}");
6614 if ((which == 0 && negated)
6615 || (which == 1 && ! negated))
6619 if (nullify && negated)
6620 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6621 else if (nullify && ! negated)
6622 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6624 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6626 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6631 /* The reversed conditional branch must branch over one additional
6632 instruction if the delay slot is filled and needs to be extracted
6633 by output_lbranch. If the delay slot is empty or this is a
6634 nullified forward branch, the instruction after the reversed
6635 condition branch must be nullified. */
6636 if (dbr_sequence_length () == 0
6637 || (nullify && forward_branch_p (insn)))
6641 operands[4] = GEN_INT (length);
6646 operands[4] = GEN_INT (length + 4);
6649 if (GET_MODE (operands[0]) == DImode)
6650 strcpy (buf, "bb,*");
6652 strcpy (buf, "bb,");
6653 if ((which == 0 && negated)
6654 || (which == 1 && !negated))
6659 strcat (buf, ",n %0,%1,.+%4");
6661 strcat (buf, " %0,%1,.+%4");
6662 output_asm_insn (buf, operands);
6663 return output_lbranch (negated ? operands[3] : operands[2],
6669 /* This routine handles all the branch-on-variable-bit conditional branch
6670 sequences we might need to generate. It handles nullification of delay
6671 slots, varying length branches, negated branches and all combinations
6672 of the above. it returns the appropriate output template to emit the
6676 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6678 static char buf[100];
6680 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6681 int length = get_attr_length (insn);
6684 /* A conditional branch to the following instruction (e.g. the delay slot) is
6685 asking for a disaster. I do not think this can happen as this pattern
6686 is only used when optimizing; jump optimization should eliminate the
6687 jump. But be prepared just in case. */
6689 if (branch_to_delay_slot_p (insn))
6692 /* If this is a long branch with its delay slot unfilled, set `nullify'
6693 as it can nullify the delay slot and save a nop. */
6694 if (length == 8 && dbr_sequence_length () == 0)
6697 /* If this is a short forward conditional branch which did not get
6698 its delay slot filled, the delay slot can still be nullified. */
6699 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6700 nullify = forward_branch_p (insn);
6702 /* A forward branch over a single nullified insn can be done with a
6703 extrs instruction. This avoids a single cycle penalty due to
6704 mis-predicted branch if we fall through (branch not taken). */
6707 && next_real_insn (insn) != 0
6708 && get_attr_length (next_real_insn (insn)) == 4
6709 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6716 /* All short conditional branches except backwards with an unfilled
6720 strcpy (buf, "{vextrs,|extrw,s,}");
6722 strcpy (buf, "{bvb,|bb,}");
6723 if (useskip && GET_MODE (operands[0]) == DImode)
6724 strcpy (buf, "extrd,s,*");
6725 else if (GET_MODE (operands[0]) == DImode)
6726 strcpy (buf, "bb,*");
6727 if ((which == 0 && negated)
6728 || (which == 1 && ! negated))
6733 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6734 else if (nullify && negated)
6736 if (branch_needs_nop_p (insn))
6737 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6739 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6741 else if (nullify && ! negated)
6743 if (branch_needs_nop_p (insn))
6744 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6746 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6748 else if (! nullify && negated)
6749 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6750 else if (! nullify && ! negated)
6751 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6754 /* All long conditionals. Note a short backward branch with an
6755 unfilled delay slot is treated just like a long backward branch
6756 with an unfilled delay slot. */
6758 /* Handle weird backwards branch with a filled delay slot
6759 which is nullified. */
6760 if (dbr_sequence_length () != 0
6761 && ! forward_branch_p (insn)
6764 strcpy (buf, "{bvb,|bb,}");
6765 if (GET_MODE (operands[0]) == DImode)
6767 if ((which == 0 && negated)
6768 || (which == 1 && ! negated))
6773 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6775 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6777 /* Handle short backwards branch with an unfilled delay slot.
6778 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6779 taken and untaken branches. */
6780 else if (dbr_sequence_length () == 0
6781 && ! forward_branch_p (insn)
6782 && INSN_ADDRESSES_SET_P ()
6783 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6784 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6786 strcpy (buf, "{bvb,|bb,}");
6787 if (GET_MODE (operands[0]) == DImode)
6789 if ((which == 0 && negated)
6790 || (which == 1 && ! negated))
6795 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6797 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6801 strcpy (buf, "{vextrs,|extrw,s,}");
6802 if (GET_MODE (operands[0]) == DImode)
6803 strcpy (buf, "extrd,s,*");
6804 if ((which == 0 && negated)
6805 || (which == 1 && ! negated))
6809 if (nullify && negated)
6810 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6811 else if (nullify && ! negated)
6812 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6814 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6816 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6821 /* The reversed conditional branch must branch over one additional
6822 instruction if the delay slot is filled and needs to be extracted
6823 by output_lbranch. If the delay slot is empty or this is a
6824 nullified forward branch, the instruction after the reversed
6825 condition branch must be nullified. */
6826 if (dbr_sequence_length () == 0
6827 || (nullify && forward_branch_p (insn)))
6831 operands[4] = GEN_INT (length);
6836 operands[4] = GEN_INT (length + 4);
6839 if (GET_MODE (operands[0]) == DImode)
6840 strcpy (buf, "bb,*");
6842 strcpy (buf, "{bvb,|bb,}");
6843 if ((which == 0 && negated)
6844 || (which == 1 && !negated))
6849 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6851 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6852 output_asm_insn (buf, operands);
6853 return output_lbranch (negated ? operands[3] : operands[2],
6859 /* Return the output template for emitting a dbra type insn.
6861 Note it may perform some output operations on its own before
6862 returning the final output string. */
6864 output_dbra (rtx *operands, rtx insn, int which_alternative)
6866 int length = get_attr_length (insn);
6868 /* A conditional branch to the following instruction (e.g. the delay slot) is
6869 asking for a disaster. Be prepared! */
6871 if (branch_to_delay_slot_p (insn))
6873 if (which_alternative == 0)
6874 return "ldo %1(%0),%0";
6875 else if (which_alternative == 1)
6877 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6878 output_asm_insn ("ldw -16(%%r30),%4", operands);
6879 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6880 return "{fldws|fldw} -16(%%r30),%0";
6884 output_asm_insn ("ldw %0,%4", operands);
6885 return "ldo %1(%4),%4\n\tstw %4,%0";
6889 if (which_alternative == 0)
6891 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6894 /* If this is a long branch with its delay slot unfilled, set `nullify'
6895 as it can nullify the delay slot and save a nop. */
6896 if (length == 8 && dbr_sequence_length () == 0)
6899 /* If this is a short forward conditional branch which did not get
6900 its delay slot filled, the delay slot can still be nullified. */
6901 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6902 nullify = forward_branch_p (insn);
6909 if (branch_needs_nop_p (insn))
6910 return "addib,%C2,n %1,%0,%3%#";
6912 return "addib,%C2,n %1,%0,%3";
6915 return "addib,%C2 %1,%0,%3";
6918 /* Handle weird backwards branch with a fulled delay slot
6919 which is nullified. */
6920 if (dbr_sequence_length () != 0
6921 && ! forward_branch_p (insn)
6923 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6924 /* Handle short backwards branch with an unfilled delay slot.
6925 Using a addb;nop rather than addi;bl saves 1 cycle for both
6926 taken and untaken branches. */
6927 else if (dbr_sequence_length () == 0
6928 && ! forward_branch_p (insn)
6929 && INSN_ADDRESSES_SET_P ()
6930 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6931 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6932 return "addib,%C2 %1,%0,%3%#";
6934 /* Handle normal cases. */
6936 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6938 return "addi,%N2 %1,%0,%0\n\tb %3";
6941 /* The reversed conditional branch must branch over one additional
6942 instruction if the delay slot is filled and needs to be extracted
6943 by output_lbranch. If the delay slot is empty or this is a
6944 nullified forward branch, the instruction after the reversed
6945 condition branch must be nullified. */
6946 if (dbr_sequence_length () == 0
6947 || (nullify && forward_branch_p (insn)))
6951 operands[4] = GEN_INT (length);
6956 operands[4] = GEN_INT (length + 4);
6960 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
6962 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
6964 return output_lbranch (operands[3], insn, xdelay);
6968 /* Deal with gross reload from FP register case. */
6969 else if (which_alternative == 1)
6971 /* Move loop counter from FP register to MEM then into a GR,
6972 increment the GR, store the GR into MEM, and finally reload
6973 the FP register from MEM from within the branch's delay slot. */
6974 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6976 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6978 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6979 else if (length == 28)
6980 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6983 operands[5] = GEN_INT (length - 16);
6984 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
6985 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6986 return output_lbranch (operands[3], insn, 0);
6989 /* Deal with gross reload from memory case. */
6992 /* Reload loop counter from memory, the store back to memory
6993 happens in the branch's delay slot. */
6994 output_asm_insn ("ldw %0,%4", operands);
6996 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6997 else if (length == 16)
6998 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7001 operands[5] = GEN_INT (length - 4);
7002 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7003 return output_lbranch (operands[3], insn, 0);
7008 /* Return the output template for emitting a movb type insn.
7010 Note it may perform some output operations on its own before
7011 returning the final output string. */
7013 output_movb (rtx *operands, rtx insn, int which_alternative,
7014 int reverse_comparison)
7016 int length = get_attr_length (insn);
7018 /* A conditional branch to the following instruction (e.g. the delay slot) is
7019 asking for a disaster. Be prepared! */
7021 if (branch_to_delay_slot_p (insn))
7023 if (which_alternative == 0)
7024 return "copy %1,%0";
7025 else if (which_alternative == 1)
7027 output_asm_insn ("stw %1,-16(%%r30)", operands);
7028 return "{fldws|fldw} -16(%%r30),%0";
7030 else if (which_alternative == 2)
7036 /* Support the second variant. */
7037 if (reverse_comparison)
7038 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7040 if (which_alternative == 0)
7042 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7045 /* If this is a long branch with its delay slot unfilled, set `nullify'
7046 as it can nullify the delay slot and save a nop. */
7047 if (length == 8 && dbr_sequence_length () == 0)
7050 /* If this is a short forward conditional branch which did not get
7051 its delay slot filled, the delay slot can still be nullified. */
7052 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7053 nullify = forward_branch_p (insn);
7060 if (branch_needs_nop_p (insn))
7061 return "movb,%C2,n %1,%0,%3%#";
7063 return "movb,%C2,n %1,%0,%3";
7066 return "movb,%C2 %1,%0,%3";
7069 /* Handle weird backwards branch with a filled delay slot
7070 which is nullified. */
7071 if (dbr_sequence_length () != 0
7072 && ! forward_branch_p (insn)
7074 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7076 /* Handle short backwards branch with an unfilled delay slot.
7077 Using a movb;nop rather than or;bl saves 1 cycle for both
7078 taken and untaken branches. */
7079 else if (dbr_sequence_length () == 0
7080 && ! forward_branch_p (insn)
7081 && INSN_ADDRESSES_SET_P ()
7082 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7083 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7084 return "movb,%C2 %1,%0,%3%#";
7085 /* Handle normal cases. */
7087 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7089 return "or,%N2 %1,%%r0,%0\n\tb %3";
7092 /* The reversed conditional branch must branch over one additional
7093 instruction if the delay slot is filled and needs to be extracted
7094 by output_lbranch. If the delay slot is empty or this is a
7095 nullified forward branch, the instruction after the reversed
7096 condition branch must be nullified. */
7097 if (dbr_sequence_length () == 0
7098 || (nullify && forward_branch_p (insn)))
7102 operands[4] = GEN_INT (length);
7107 operands[4] = GEN_INT (length + 4);
7111 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7113 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7115 return output_lbranch (operands[3], insn, xdelay);
7118 /* Deal with gross reload for FP destination register case. */
7119 else if (which_alternative == 1)
7121 /* Move source register to MEM, perform the branch test, then
7122 finally load the FP register from MEM from within the branch's
7124 output_asm_insn ("stw %1,-16(%%r30)", operands);
7126 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7127 else if (length == 16)
7128 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7131 operands[4] = GEN_INT (length - 4);
7132 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7133 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7134 return output_lbranch (operands[3], insn, 0);
7137 /* Deal with gross reload from memory case. */
7138 else if (which_alternative == 2)
7140 /* Reload loop counter from memory, the store back to memory
7141 happens in the branch's delay slot. */
7143 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7144 else if (length == 12)
7145 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7148 operands[4] = GEN_INT (length);
7149 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7151 return output_lbranch (operands[3], insn, 0);
7154 /* Handle SAR as a destination. */
7158 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7159 else if (length == 12)
7160 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7163 operands[4] = GEN_INT (length);
7164 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7166 return output_lbranch (operands[3], insn, 0);
7171 /* Copy any FP arguments in INSN into integer registers. */
7173 copy_fp_args (rtx insn)
7178 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7180 int arg_mode, regno;
7181 rtx use = XEXP (link, 0);
7183 if (! (GET_CODE (use) == USE
7184 && GET_CODE (XEXP (use, 0)) == REG
7185 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7188 arg_mode = GET_MODE (XEXP (use, 0));
7189 regno = REGNO (XEXP (use, 0));
7191 /* Is it a floating point register? */
7192 if (regno >= 32 && regno <= 39)
7194 /* Copy the FP register into an integer register via memory. */
7195 if (arg_mode == SFmode)
7197 xoperands[0] = XEXP (use, 0);
7198 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7199 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7200 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7204 xoperands[0] = XEXP (use, 0);
7205 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7206 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7207 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7208 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7214 /* Compute length of the FP argument copy sequence for INSN. */
7216 length_fp_args (rtx insn)
7221 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7223 int arg_mode, regno;
7224 rtx use = XEXP (link, 0);
7226 if (! (GET_CODE (use) == USE
7227 && GET_CODE (XEXP (use, 0)) == REG
7228 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7231 arg_mode = GET_MODE (XEXP (use, 0));
7232 regno = REGNO (XEXP (use, 0));
7234 /* Is it a floating point register? */
7235 if (regno >= 32 && regno <= 39)
7237 if (arg_mode == SFmode)
7247 /* Return the attribute length for the millicode call instruction INSN.
7248 The length must match the code generated by output_millicode_call.
7249 We include the delay slot in the returned length as it is better to
7250 over estimate the length than to under estimate it. */
7253 attr_length_millicode_call (rtx insn)
7255 unsigned long distance = -1;
7256 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7258 if (INSN_ADDRESSES_SET_P ())
7260 distance = (total + insn_current_reference_address (insn));
7261 if (distance < total)
7267 if (!TARGET_LONG_CALLS && distance < 7600000)
7272 else if (TARGET_PORTABLE_RUNTIME)
7276 if (!TARGET_LONG_CALLS && distance < 240000)
7279 if (TARGET_LONG_ABS_CALL && !flag_pic)
7286 /* INSN is a function call. It may have an unconditional jump
7289 CALL_DEST is the routine we are calling. */
7292 output_millicode_call (rtx insn, rtx call_dest)
7294 int attr_length = get_attr_length (insn);
7295 int seq_length = dbr_sequence_length ();
7300 xoperands[0] = call_dest;
7301 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7303 /* Handle the common case where we are sure that the branch will
7304 reach the beginning of the $CODE$ subspace. The within reach
7305 form of the $$sh_func_adrs call has a length of 28. Because
7306 it has an attribute type of multi, it never has a nonzero
7307 sequence length. The length of the $$sh_func_adrs is the same
7308 as certain out of reach PIC calls to other routines. */
7309 if (!TARGET_LONG_CALLS
7310 && ((seq_length == 0
7311 && (attr_length == 12
7312 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7313 || (seq_length != 0 && attr_length == 8)))
7315 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7321 /* It might seem that one insn could be saved by accessing
7322 the millicode function using the linkage table. However,
7323 this doesn't work in shared libraries and other dynamically
7324 loaded objects. Using a pc-relative sequence also avoids
7325 problems related to the implicit use of the gp register. */
7326 output_asm_insn ("b,l .+8,%%r1", xoperands);
7330 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7331 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7335 xoperands[1] = gen_label_rtx ();
7336 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7337 targetm.asm_out.internal_label (asm_out_file, "L",
7338 CODE_LABEL_NUMBER (xoperands[1]));
7339 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7342 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7344 else if (TARGET_PORTABLE_RUNTIME)
7346 /* Pure portable runtime doesn't allow be/ble; we also don't
7347 have PIC support in the assembler/linker, so this sequence
7350 /* Get the address of our target into %r1. */
7351 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7352 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7354 /* Get our return address into %r31. */
7355 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7356 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7358 /* Jump to our target address in %r1. */
7359 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7363 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7365 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7367 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7371 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7372 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7374 if (TARGET_SOM || !TARGET_GAS)
7376 /* The HP assembler can generate relocations for the
7377 difference of two symbols. GAS can do this for a
7378 millicode symbol but not an arbitrary external
7379 symbol when generating SOM output. */
7380 xoperands[1] = gen_label_rtx ();
7381 targetm.asm_out.internal_label (asm_out_file, "L",
7382 CODE_LABEL_NUMBER (xoperands[1]));
7383 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7384 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7388 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7389 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7393 /* Jump to our target address in %r1. */
7394 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7398 if (seq_length == 0)
7399 output_asm_insn ("nop", xoperands);
7401 /* We are done if there isn't a jump in the delay slot. */
7402 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7405 /* This call has an unconditional jump in its delay slot. */
7406 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7408 /* See if the return address can be adjusted. Use the containing
7409 sequence insn's address. */
7410 if (INSN_ADDRESSES_SET_P ())
7412 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7413 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7414 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7416 if (VAL_14_BITS_P (distance))
7418 xoperands[1] = gen_label_rtx ();
7419 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7420 targetm.asm_out.internal_label (asm_out_file, "L",
7421 CODE_LABEL_NUMBER (xoperands[1]));
7424 /* ??? This branch may not reach its target. */
7425 output_asm_insn ("nop\n\tb,n %0", xoperands);
7428 /* ??? This branch may not reach its target. */
7429 output_asm_insn ("nop\n\tb,n %0", xoperands);
7431 /* Delete the jump. */
7432 SET_INSN_DELETED (NEXT_INSN (insn));
7437 /* Return the attribute length of the call instruction INSN. The SIBCALL
7438 flag indicates whether INSN is a regular call or a sibling call. The
7439 length returned must be longer than the code actually generated by
7440 output_call. Since branch shortening is done before delay branch
7441 sequencing, there is no way to determine whether or not the delay
7442 slot will be filled during branch shortening. Even when the delay
7443 slot is filled, we may have to add a nop if the delay slot contains
7444 a branch that can't reach its target. Thus, we always have to include
7445 the delay slot in the length estimate. This used to be done in
7446 pa_adjust_insn_length but we do it here now as some sequences always
7447 fill the delay slot and we can save four bytes in the estimate for
7451 attr_length_call (rtx insn, int sibcall)
7454 rtx call, call_dest;
7457 rtx pat = PATTERN (insn);
7458 unsigned long distance = -1;
7460 gcc_assert (GET_CODE (insn) == CALL_INSN);
7462 if (INSN_ADDRESSES_SET_P ())
7464 unsigned long total;
7466 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7467 distance = (total + insn_current_reference_address (insn));
7468 if (distance < total)
7472 gcc_assert (GET_CODE (pat) == PARALLEL);
7474 /* Get the call rtx. */
7475 call = XVECEXP (pat, 0, 0);
7476 if (GET_CODE (call) == SET)
7477 call = SET_SRC (call);
7479 gcc_assert (GET_CODE (call) == CALL);
7481 /* Determine if this is a local call. */
7482 call_dest = XEXP (XEXP (call, 0), 0);
7483 call_decl = SYMBOL_REF_DECL (call_dest);
7484 local_call = call_decl && targetm.binds_local_p (call_decl);
7486 /* pc-relative branch. */
7487 if (!TARGET_LONG_CALLS
7488 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7489 || distance < 240000))
7492 /* 64-bit plabel sequence. */
7493 else if (TARGET_64BIT && !local_call)
7494 length += sibcall ? 28 : 24;
7496 /* non-pic long absolute branch sequence. */
7497 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7500 /* long pc-relative branch sequence. */
7501 else if (TARGET_LONG_PIC_SDIFF_CALL
7502 || (TARGET_GAS && !TARGET_SOM
7503 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7507 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && flag_pic)
7511 /* 32-bit plabel sequence. */
7517 length += length_fp_args (insn);
7527 if (!TARGET_NO_SPACE_REGS && flag_pic)
7535 /* INSN is a function call. It may have an unconditional jump
7538 CALL_DEST is the routine we are calling. */
7541 output_call (rtx insn, rtx call_dest, int sibcall)
7543 int delay_insn_deleted = 0;
7544 int delay_slot_filled = 0;
7545 int seq_length = dbr_sequence_length ();
7546 tree call_decl = SYMBOL_REF_DECL (call_dest);
7547 int local_call = call_decl && targetm.binds_local_p (call_decl);
7550 xoperands[0] = call_dest;
7552 /* Handle the common case where we're sure that the branch will reach
7553 the beginning of the "$CODE$" subspace. This is the beginning of
7554 the current function if we are in a named section. */
7555 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7557 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7558 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7562 if (TARGET_64BIT && !local_call)
7564 /* ??? As far as I can tell, the HP linker doesn't support the
7565 long pc-relative sequence described in the 64-bit runtime
7566 architecture. So, we use a slightly longer indirect call. */
7567 xoperands[0] = get_deferred_plabel (call_dest);
7568 xoperands[1] = gen_label_rtx ();
7570 /* If this isn't a sibcall, we put the load of %r27 into the
7571 delay slot. We can't do this in a sibcall as we don't
7572 have a second call-clobbered scratch register available. */
7574 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7577 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7580 /* Now delete the delay insn. */
7581 SET_INSN_DELETED (NEXT_INSN (insn));
7582 delay_insn_deleted = 1;
7585 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7586 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7587 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7591 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7592 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7593 output_asm_insn ("bve (%%r1)", xoperands);
7597 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7598 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7599 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7600 delay_slot_filled = 1;
7605 int indirect_call = 0;
7607 /* Emit a long call. There are several different sequences
7608 of increasing length and complexity. In most cases,
7609 they don't allow an instruction in the delay slot. */
7610 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7611 && !TARGET_LONG_PIC_SDIFF_CALL
7612 && !(TARGET_GAS && !TARGET_SOM
7613 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7618 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7622 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7624 /* A non-jump insn in the delay slot. By definition we can
7625 emit this insn before the call (and in fact before argument
7627 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7630 /* Now delete the delay insn. */
7631 SET_INSN_DELETED (NEXT_INSN (insn));
7632 delay_insn_deleted = 1;
7635 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7637 /* This is the best sequence for making long calls in
7638 non-pic code. Unfortunately, GNU ld doesn't provide
7639 the stub needed for external calls, and GAS's support
7640 for this with the SOM linker is buggy. It is safe
7641 to use this for local calls. */
7642 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7644 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7648 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7651 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7653 output_asm_insn ("copy %%r31,%%r2", xoperands);
7654 delay_slot_filled = 1;
7659 if (TARGET_LONG_PIC_SDIFF_CALL)
7661 /* The HP assembler and linker can handle relocations
7662 for the difference of two symbols. The HP assembler
7663 recognizes the sequence as a pc-relative call and
7664 the linker provides stubs when needed. */
7665 xoperands[1] = gen_label_rtx ();
7666 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7667 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7668 targetm.asm_out.internal_label (asm_out_file, "L",
7669 CODE_LABEL_NUMBER (xoperands[1]));
7670 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7672 else if (TARGET_GAS && !TARGET_SOM
7673 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7675 /* GAS currently can't generate the relocations that
7676 are needed for the SOM linker under HP-UX using this
7677 sequence. The GNU linker doesn't generate the stubs
7678 that are needed for external calls on TARGET_ELF32
7679 with this sequence. For now, we have to use a
7680 longer plabel sequence when using GAS. */
7681 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7682 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7684 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7689 /* Emit a long plabel-based call sequence. This is
7690 essentially an inline implementation of $$dyncall.
7691 We don't actually try to call $$dyncall as this is
7692 as difficult as calling the function itself. */
7693 xoperands[0] = get_deferred_plabel (call_dest);
7694 xoperands[1] = gen_label_rtx ();
7696 /* Since the call is indirect, FP arguments in registers
7697 need to be copied to the general registers. Then, the
7698 argument relocation stub will copy them back. */
7700 copy_fp_args (insn);
7704 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7705 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7706 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7710 output_asm_insn ("addil LR'%0-$global$,%%r27",
7712 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7716 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7717 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7718 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7719 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7721 if (!sibcall && !TARGET_PA_20)
7723 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7724 if (TARGET_NO_SPACE_REGS)
7725 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7727 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7734 output_asm_insn ("bve (%%r1)", xoperands);
7739 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7740 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7741 delay_slot_filled = 1;
7744 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7749 if (!TARGET_NO_SPACE_REGS && flag_pic)
7750 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7755 if (TARGET_NO_SPACE_REGS || !flag_pic)
7756 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7758 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7762 if (TARGET_NO_SPACE_REGS || !flag_pic)
7763 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7765 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7768 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7770 output_asm_insn ("copy %%r31,%%r2", xoperands);
7771 delay_slot_filled = 1;
7778 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7779 output_asm_insn ("nop", xoperands);
7781 /* We are done if there isn't a jump in the delay slot. */
7783 || delay_insn_deleted
7784 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7787 /* A sibcall should never have a branch in the delay slot. */
7788 gcc_assert (!sibcall);
7790 /* This call has an unconditional jump in its delay slot. */
7791 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7793 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7795 /* See if the return address can be adjusted. Use the containing
7796 sequence insn's address. This would break the regular call/return@
7797 relationship assumed by the table based eh unwinder, so only do that
7798 if the call is not possibly throwing. */
7799 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7800 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7801 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7803 if (VAL_14_BITS_P (distance)
7804 && !(can_throw_internal (insn) || can_throw_external (insn)))
7806 xoperands[1] = gen_label_rtx ();
7807 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7808 targetm.asm_out.internal_label (asm_out_file, "L",
7809 CODE_LABEL_NUMBER (xoperands[1]));
7812 output_asm_insn ("nop\n\tb,n %0", xoperands);
7815 output_asm_insn ("b,n %0", xoperands);
7817 /* Delete the jump. */
7818 SET_INSN_DELETED (NEXT_INSN (insn));
7823 /* Return the attribute length of the indirect call instruction INSN.
7824 The length must match the code generated by output_indirect call.
7825 The returned length includes the delay slot. Currently, the delay
7826 slot of an indirect call sequence is not exposed and it is used by
7827 the sequence itself. */
7830 attr_length_indirect_call (rtx insn)
7832 unsigned long distance = -1;
7833 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7835 if (INSN_ADDRESSES_SET_P ())
7837 distance = (total + insn_current_reference_address (insn));
7838 if (distance < total)
7845 if (TARGET_FAST_INDIRECT_CALLS
7846 || (!TARGET_PORTABLE_RUNTIME
7847 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7848 || distance < 240000)))
7854 if (TARGET_PORTABLE_RUNTIME)
7857 /* Out of reach, can use ble. */
7862 output_indirect_call (rtx insn, rtx call_dest)
7868 xoperands[0] = call_dest;
7869 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7870 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7874 /* First the special case for kernels, level 0 systems, etc. */
7875 if (TARGET_FAST_INDIRECT_CALLS)
7876 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7878 /* Now the normal case -- we can reach $$dyncall directly or
7879 we're sure that we can get there via a long-branch stub.
7881 No need to check target flags as the length uniquely identifies
7882 the remaining cases. */
7883 if (attr_length_indirect_call (insn) == 8)
7885 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7886 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
7887 variant of the B,L instruction can't be used on the SOM target. */
7888 if (TARGET_PA_20 && !TARGET_SOM)
7889 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7891 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7894 /* Long millicode call, but we are not generating PIC or portable runtime
7896 if (attr_length_indirect_call (insn) == 12)
7897 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7899 /* Long millicode call for portable runtime. */
7900 if (attr_length_indirect_call (insn) == 20)
7901 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7903 /* We need a long PIC call to $$dyncall. */
7904 xoperands[0] = NULL_RTX;
7905 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7906 if (TARGET_SOM || !TARGET_GAS)
7908 xoperands[0] = gen_label_rtx ();
7909 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7910 targetm.asm_out.internal_label (asm_out_file, "L",
7911 CODE_LABEL_NUMBER (xoperands[0]));
7912 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7916 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7917 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7920 output_asm_insn ("blr %%r0,%%r2", xoperands);
7921 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7925 /* Return the total length of the save and restore instructions needed for
7926 the data linkage table pointer (i.e., the PIC register) across the call
7927 instruction INSN. No-return calls do not require a save and restore.
7928 In addition, we may be able to avoid the save and restore for calls
7929 within the same translation unit. */
7932 attr_length_save_restore_dltp (rtx insn)
7934 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7940 /* In HPUX 8.0's shared library scheme, special relocations are needed
7941 for function labels if they might be passed to a function
7942 in a shared library (because shared libraries don't live in code
7943 space), and special magic is needed to construct their address. */
7946 hppa_encode_label (rtx sym)
7948 const char *str = XSTR (sym, 0);
7949 int len = strlen (str) + 1;
7952 p = newstr = XALLOCAVEC (char, len + 1);
7956 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7960 pa_encode_section_info (tree decl, rtx rtl, int first)
7962 int old_referenced = 0;
7964 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
7966 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
7968 default_encode_section_info (decl, rtl, first);
7970 if (first && TEXT_SPACE_P (decl))
7972 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7973 if (TREE_CODE (decl) == FUNCTION_DECL)
7974 hppa_encode_label (XEXP (rtl, 0));
7976 else if (old_referenced)
7977 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
7980 /* This is sort of inverse to pa_encode_section_info. */
7983 pa_strip_name_encoding (const char *str)
7985 str += (*str == '@');
7986 str += (*str == '*');
7991 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7993 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7996 /* Returns 1 if OP is a function label involved in a simple addition
7997 with a constant. Used to keep certain patterns from matching
7998 during instruction combination. */
8000 is_function_label_plus_const (rtx op)
8002 /* Strip off any CONST. */
8003 if (GET_CODE (op) == CONST)
8006 return (GET_CODE (op) == PLUS
8007 && function_label_operand (XEXP (op, 0), Pmode)
8008 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8011 /* Output assembly code for a thunk to FUNCTION. */
8014 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8015 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8018 static unsigned int current_thunk_number;
8019 int val_14 = VAL_14_BITS_P (delta);
8020 unsigned int old_last_address = last_address, nbytes = 0;
8024 xoperands[0] = XEXP (DECL_RTL (function), 0);
8025 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8026 xoperands[2] = GEN_INT (delta);
8028 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8029 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8031 /* Output the thunk. We know that the function is in the same
8032 translation unit (i.e., the same space) as the thunk, and that
8033 thunks are output after their method. Thus, we don't need an
8034 external branch to reach the function. With SOM and GAS,
8035 functions and thunks are effectively in different sections.
8036 Thus, we can always use a IA-relative branch and the linker
8037 will add a long branch stub if necessary.
8039 However, we have to be careful when generating PIC code on the
8040 SOM port to ensure that the sequence does not transfer to an
8041 import stub for the target function as this could clobber the
8042 return value saved at SP-24. This would also apply to the
8043 32-bit linux port if the multi-space model is implemented. */
8044 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8045 && !(flag_pic && TREE_PUBLIC (function))
8046 && (TARGET_GAS || last_address < 262132))
8047 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8048 && ((targetm.have_named_sections
8049 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8050 /* The GNU 64-bit linker has rather poor stub management.
8051 So, we use a long branch from thunks that aren't in
8052 the same section as the target function. */
8054 && (DECL_SECTION_NAME (thunk_fndecl)
8055 != DECL_SECTION_NAME (function)))
8056 || ((DECL_SECTION_NAME (thunk_fndecl)
8057 == DECL_SECTION_NAME (function))
8058 && last_address < 262132)))
8059 || (targetm.have_named_sections
8060 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8061 && DECL_SECTION_NAME (function) == NULL
8062 && last_address < 262132)
8063 || (!targetm.have_named_sections && last_address < 262132))))
8066 output_asm_insn ("addil L'%2,%%r26", xoperands);
8068 output_asm_insn ("b %0", xoperands);
8072 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8077 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8081 else if (TARGET_64BIT)
8083 /* We only have one call-clobbered scratch register, so we can't
8084 make use of the delay slot if delta doesn't fit in 14 bits. */
8087 output_asm_insn ("addil L'%2,%%r26", xoperands);
8088 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8091 output_asm_insn ("b,l .+8,%%r1", xoperands);
8095 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8096 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8100 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8101 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8106 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8107 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8112 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8116 else if (TARGET_PORTABLE_RUNTIME)
8118 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8119 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8122 output_asm_insn ("addil L'%2,%%r26", xoperands);
8124 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8128 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8133 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8137 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8139 /* The function is accessible from outside this module. The only
8140 way to avoid an import stub between the thunk and function is to
8141 call the function directly with an indirect sequence similar to
8142 that used by $$dyncall. This is possible because $$dyncall acts
8143 as the import stub in an indirect call. */
8144 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8145 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8146 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8147 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8148 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8149 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8150 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8151 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8152 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8156 output_asm_insn ("addil L'%2,%%r26", xoperands);
8162 output_asm_insn ("bve (%%r22)", xoperands);
8165 else if (TARGET_NO_SPACE_REGS)
8167 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8172 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8173 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8174 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8179 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8181 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8185 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8187 if (TARGET_SOM || !TARGET_GAS)
8189 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8190 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8194 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8195 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8199 output_asm_insn ("addil L'%2,%%r26", xoperands);
8201 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8205 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8210 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8217 output_asm_insn ("addil L'%2,%%r26", xoperands);
8219 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8220 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8224 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8229 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8234 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8236 if (TARGET_SOM && TARGET_GAS)
8238 /* We done with this subspace except possibly for some additional
8239 debug information. Forget that we are in this subspace to ensure
8240 that the next function is output in its own subspace. */
8242 cfun->machine->in_nsubspa = 2;
8245 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8247 switch_to_section (data_section);
8248 output_asm_insn (".align 4", xoperands);
8249 ASM_OUTPUT_LABEL (file, label);
8250 output_asm_insn (".word P'%0", xoperands);
8253 current_thunk_number++;
8254 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8255 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8256 last_address += nbytes;
8257 if (old_last_address > last_address)
8258 last_address = UINT_MAX;
8259 update_total_code_bytes (nbytes);
8262 /* Only direct calls to static functions are allowed to be sibling (tail)
8265 This restriction is necessary because some linker generated stubs will
8266 store return pointers into rp' in some cases which might clobber a
8267 live value already in rp'.
8269 In a sibcall the current function and the target function share stack
8270 space. Thus if the path to the current function and the path to the
8271 target function save a value in rp', they save the value into the
8272 same stack slot, which has undesirable consequences.
8274 Because of the deferred binding nature of shared libraries any function
8275 with external scope could be in a different load module and thus require
8276 rp' to be saved when calling that function. So sibcall optimizations
8277 can only be safe for static function.
8279 Note that GCC never needs return value relocations, so we don't have to
8280 worry about static calls with return value relocations (which require
8283 It is safe to perform a sibcall optimization when the target function
8284 will never return. */
8286 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8288 if (TARGET_PORTABLE_RUNTIME)
8291 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8292 single subspace mode and the call is not indirect. As far as I know,
8293 there is no operating system support for the multiple subspace mode.
8294 It might be possible to support indirect calls if we didn't use
8295 $$dyncall (see the indirect sequence generated in output_call). */
8297 return (decl != NULL_TREE);
8299 /* Sibcalls are not ok because the arg pointer register is not a fixed
8300 register. This prevents the sibcall optimization from occurring. In
8301 addition, there are problems with stub placement using GNU ld. This
8302 is because a normal sibcall branch uses a 17-bit relocation while
8303 a regular call branch uses a 22-bit relocation. As a result, more
8304 care needs to be taken in the placement of long-branch stubs. */
8308 /* Sibcalls are only ok within a translation unit. */
8309 return (decl && !TREE_PUBLIC (decl));
8312 /* ??? Addition is not commutative on the PA due to the weird implicit
8313 space register selection rules for memory addresses. Therefore, we
8314 don't consider a + b == b + a, as this might be inside a MEM. */
8316 pa_commutative_p (const_rtx x, int outer_code)
8318 return (COMMUTATIVE_P (x)
8319 && (TARGET_NO_SPACE_REGS
8320 || (outer_code != UNKNOWN && outer_code != MEM)
8321 || GET_CODE (x) != PLUS));
8324 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8325 use in fmpyadd instructions. */
8327 fmpyaddoperands (rtx *operands)
8329 enum machine_mode mode = GET_MODE (operands[0]);
8331 /* Must be a floating point mode. */
8332 if (mode != SFmode && mode != DFmode)
8335 /* All modes must be the same. */
8336 if (! (mode == GET_MODE (operands[1])
8337 && mode == GET_MODE (operands[2])
8338 && mode == GET_MODE (operands[3])
8339 && mode == GET_MODE (operands[4])
8340 && mode == GET_MODE (operands[5])))
8343 /* All operands must be registers. */
8344 if (! (GET_CODE (operands[1]) == REG
8345 && GET_CODE (operands[2]) == REG
8346 && GET_CODE (operands[3]) == REG
8347 && GET_CODE (operands[4]) == REG
8348 && GET_CODE (operands[5]) == REG))
8351 /* Only 2 real operands to the addition. One of the input operands must
8352 be the same as the output operand. */
8353 if (! rtx_equal_p (operands[3], operands[4])
8354 && ! rtx_equal_p (operands[3], operands[5]))
8357 /* Inout operand of add cannot conflict with any operands from multiply. */
8358 if (rtx_equal_p (operands[3], operands[0])
8359 || rtx_equal_p (operands[3], operands[1])
8360 || rtx_equal_p (operands[3], operands[2]))
8363 /* multiply cannot feed into addition operands. */
8364 if (rtx_equal_p (operands[4], operands[0])
8365 || rtx_equal_p (operands[5], operands[0]))
8368 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8370 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8371 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8372 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8373 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8374 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8375 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8378 /* Passed. Operands are suitable for fmpyadd. */
8382 #if !defined(USE_COLLECT2)
8384 pa_asm_out_constructor (rtx symbol, int priority)
8386 if (!function_label_operand (symbol, VOIDmode))
8387 hppa_encode_label (symbol);
8389 #ifdef CTORS_SECTION_ASM_OP
8390 default_ctor_section_asm_out_constructor (symbol, priority);
8392 # ifdef TARGET_ASM_NAMED_SECTION
8393 default_named_section_asm_out_constructor (symbol, priority);
8395 default_stabs_asm_out_constructor (symbol, priority);
8401 pa_asm_out_destructor (rtx symbol, int priority)
8403 if (!function_label_operand (symbol, VOIDmode))
8404 hppa_encode_label (symbol);
8406 #ifdef DTORS_SECTION_ASM_OP
8407 default_dtor_section_asm_out_destructor (symbol, priority);
8409 # ifdef TARGET_ASM_NAMED_SECTION
8410 default_named_section_asm_out_destructor (symbol, priority);
8412 default_stabs_asm_out_destructor (symbol, priority);
8418 /* This function places uninitialized global data in the bss section.
8419 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8420 function on the SOM port to prevent uninitialized global data from
8421 being placed in the data section. */
8424 pa_asm_output_aligned_bss (FILE *stream,
8426 unsigned HOST_WIDE_INT size,
8429 switch_to_section (bss_section);
8430 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8432 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8433 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8436 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8437 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8440 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8441 ASM_OUTPUT_LABEL (stream, name);
8442 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8445 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8446 that doesn't allow the alignment of global common storage to be directly
8447 specified. The SOM linker aligns common storage based on the rounded
8448 value of the NUM_BYTES parameter in the .comm directive. It's not
8449 possible to use the .align directive as it doesn't affect the alignment
8450 of the label associated with a .comm directive. */
8453 pa_asm_output_aligned_common (FILE *stream,
8455 unsigned HOST_WIDE_INT size,
8458 unsigned int max_common_align;
8460 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8461 if (align > max_common_align)
8463 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8464 "for global common data. Using %u",
8465 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8466 align = max_common_align;
8469 switch_to_section (bss_section);
8471 assemble_name (stream, name);
8472 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8473 MAX (size, align / BITS_PER_UNIT));
8476 /* We can't use .comm for local common storage as the SOM linker effectively
8477 treats the symbol as universal and uses the same storage for local symbols
8478 with the same name in different object files. The .block directive
8479 reserves an uninitialized block of storage. However, it's not common
8480 storage. Fortunately, GCC never requests common storage with the same
8481 name in any given translation unit. */
8484 pa_asm_output_aligned_local (FILE *stream,
8486 unsigned HOST_WIDE_INT size,
8489 switch_to_section (bss_section);
8490 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8493 fprintf (stream, "%s", LOCAL_ASM_OP);
8494 assemble_name (stream, name);
8495 fprintf (stream, "\n");
8498 ASM_OUTPUT_LABEL (stream, name);
8499 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8502 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8503 use in fmpysub instructions. */
8505 fmpysuboperands (rtx *operands)
8507 enum machine_mode mode = GET_MODE (operands[0]);
8509 /* Must be a floating point mode. */
8510 if (mode != SFmode && mode != DFmode)
8513 /* All modes must be the same. */
8514 if (! (mode == GET_MODE (operands[1])
8515 && mode == GET_MODE (operands[2])
8516 && mode == GET_MODE (operands[3])
8517 && mode == GET_MODE (operands[4])
8518 && mode == GET_MODE (operands[5])))
8521 /* All operands must be registers. */
8522 if (! (GET_CODE (operands[1]) == REG
8523 && GET_CODE (operands[2]) == REG
8524 && GET_CODE (operands[3]) == REG
8525 && GET_CODE (operands[4]) == REG
8526 && GET_CODE (operands[5]) == REG))
8529 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8530 operation, so operands[4] must be the same as operand[3]. */
8531 if (! rtx_equal_p (operands[3], operands[4]))
8534 /* multiply cannot feed into subtraction. */
8535 if (rtx_equal_p (operands[5], operands[0]))
8538 /* Inout operand of sub cannot conflict with any operands from multiply. */
8539 if (rtx_equal_p (operands[3], operands[0])
8540 || rtx_equal_p (operands[3], operands[1])
8541 || rtx_equal_p (operands[3], operands[2]))
8544 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8546 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8547 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8548 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8549 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8550 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8551 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8554 /* Passed. Operands are suitable for fmpysub. */
8558 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8559 constants for shadd instructions. */
8561 shadd_constant_p (int val)
8563 if (val == 2 || val == 4 || val == 8)
8569 /* Return 1 if OP is valid as a base or index register in a
8573 borx_reg_operand (rtx op, enum machine_mode mode)
8575 if (GET_CODE (op) != REG)
8578 /* We must reject virtual registers as the only expressions that
8579 can be instantiated are REG and REG+CONST. */
8580 if (op == virtual_incoming_args_rtx
8581 || op == virtual_stack_vars_rtx
8582 || op == virtual_stack_dynamic_rtx
8583 || op == virtual_outgoing_args_rtx
8584 || op == virtual_cfa_rtx)
8587 /* While it's always safe to index off the frame pointer, it's not
8588 profitable to do so when the frame pointer is being eliminated. */
8589 if (!reload_completed
8590 && flag_omit_frame_pointer
8591 && !cfun->calls_alloca
8592 && op == frame_pointer_rtx)
8595 return register_operand (op, mode);
8598 /* Return 1 if this operand is anything other than a hard register. */
8601 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8603 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8606 /* Return TRUE if INSN branches forward. */
8609 forward_branch_p (rtx insn)
8611 rtx lab = JUMP_LABEL (insn);
8613 /* The INSN must have a jump label. */
8614 gcc_assert (lab != NULL_RTX);
8616 if (INSN_ADDRESSES_SET_P ())
8617 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8624 insn = NEXT_INSN (insn);
8630 /* Return 1 if OP is an equality comparison, else return 0. */
8632 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8634 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8637 /* Return 1 if INSN is in the delay slot of a call instruction. */
8639 jump_in_call_delay (rtx insn)
8642 if (GET_CODE (insn) != JUMP_INSN)
8645 if (PREV_INSN (insn)
8646 && PREV_INSN (PREV_INSN (insn))
8647 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8649 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8651 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8652 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8659 /* Output an unconditional move and branch insn. */
8662 output_parallel_movb (rtx *operands, rtx insn)
8664 int length = get_attr_length (insn);
8666 /* These are the cases in which we win. */
8668 return "mov%I1b,tr %1,%0,%2";
8670 /* None of the following cases win, but they don't lose either. */
8673 if (dbr_sequence_length () == 0)
8675 /* Nothing in the delay slot, fake it by putting the combined
8676 insn (the copy or add) in the delay slot of a bl. */
8677 if (GET_CODE (operands[1]) == CONST_INT)
8678 return "b %2\n\tldi %1,%0";
8680 return "b %2\n\tcopy %1,%0";
8684 /* Something in the delay slot, but we've got a long branch. */
8685 if (GET_CODE (operands[1]) == CONST_INT)
8686 return "ldi %1,%0\n\tb %2";
8688 return "copy %1,%0\n\tb %2";
8692 if (GET_CODE (operands[1]) == CONST_INT)
8693 output_asm_insn ("ldi %1,%0", operands);
8695 output_asm_insn ("copy %1,%0", operands);
8696 return output_lbranch (operands[2], insn, 1);
8699 /* Output an unconditional add and branch insn. */
8702 output_parallel_addb (rtx *operands, rtx insn)
8704 int length = get_attr_length (insn);
8706 /* To make life easy we want operand0 to be the shared input/output
8707 operand and operand1 to be the readonly operand. */
8708 if (operands[0] == operands[1])
8709 operands[1] = operands[2];
8711 /* These are the cases in which we win. */
8713 return "add%I1b,tr %1,%0,%3";
8715 /* None of the following cases win, but they don't lose either. */
8718 if (dbr_sequence_length () == 0)
8719 /* Nothing in the delay slot, fake it by putting the combined
8720 insn (the copy or add) in the delay slot of a bl. */
8721 return "b %3\n\tadd%I1 %1,%0,%0";
8723 /* Something in the delay slot, but we've got a long branch. */
8724 return "add%I1 %1,%0,%0\n\tb %3";
8727 output_asm_insn ("add%I1 %1,%0,%0", operands);
8728 return output_lbranch (operands[3], insn, 1);
8731 /* Return nonzero if INSN (a jump insn) immediately follows a call
8732 to a named function. This is used to avoid filling the delay slot
8733 of the jump since it can usually be eliminated by modifying RP in
8734 the delay slot of the call. */
8737 following_call (rtx insn)
8739 if (! TARGET_JUMP_IN_DELAY)
8742 /* Find the previous real insn, skipping NOTEs. */
8743 insn = PREV_INSN (insn);
8744 while (insn && GET_CODE (insn) == NOTE)
8745 insn = PREV_INSN (insn);
8747 /* Check for CALL_INSNs and millicode calls. */
8749 && ((GET_CODE (insn) == CALL_INSN
8750 && get_attr_type (insn) != TYPE_DYNCALL)
8751 || (GET_CODE (insn) == INSN
8752 && GET_CODE (PATTERN (insn)) != SEQUENCE
8753 && GET_CODE (PATTERN (insn)) != USE
8754 && GET_CODE (PATTERN (insn)) != CLOBBER
8755 && get_attr_type (insn) == TYPE_MILLI)))
8761 /* We use this hook to perform a PA specific optimization which is difficult
8762 to do in earlier passes.
8764 We want the delay slots of branches within jump tables to be filled.
8765 None of the compiler passes at the moment even has the notion that a
8766 PA jump table doesn't contain addresses, but instead contains actual
8769 Because we actually jump into the table, the addresses of each entry
8770 must stay constant in relation to the beginning of the table (which
8771 itself must stay constant relative to the instruction to jump into
8772 it). I don't believe we can guarantee earlier passes of the compiler
8773 will adhere to those rules.
8775 So, late in the compilation process we find all the jump tables, and
8776 expand them into real code -- e.g. each entry in the jump table vector
8777 will get an appropriate label followed by a jump to the final target.
8779 Reorg and the final jump pass can then optimize these branches and
8780 fill their delay slots. We end up with smaller, more efficient code.
8782 The jump instructions within the table are special; we must be able
8783 to identify them during assembly output (if the jumps don't get filled
8784 we need to emit a nop rather than nullifying the delay slot)). We
8785 identify jumps in switch tables by using insns with the attribute
8786 type TYPE_BTABLE_BRANCH.
8788 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8789 insns. This serves two purposes, first it prevents jump.c from
8790 noticing that the last N entries in the table jump to the instruction
8791 immediately after the table and deleting the jumps. Second, those
8792 insns mark where we should emit .begin_brtab and .end_brtab directives
8793 when using GAS (allows for better link time optimizations). */
8800 remove_useless_addtr_insns (1);
8802 if (pa_cpu < PROCESSOR_8000)
8803 pa_combine_instructions ();
8806 /* This is fairly cheap, so always run it if optimizing. */
8807 if (optimize > 0 && !TARGET_BIG_SWITCH)
8809 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8810 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8812 rtx pattern, tmp, location, label;
8813 unsigned int length, i;
8815 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8816 if (GET_CODE (insn) != JUMP_INSN
8817 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8818 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8821 /* Emit marker for the beginning of the branch table. */
8822 emit_insn_before (gen_begin_brtab (), insn);
8824 pattern = PATTERN (insn);
8825 location = PREV_INSN (insn);
8826 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8828 for (i = 0; i < length; i++)
8830 /* Emit a label before each jump to keep jump.c from
8831 removing this code. */
8832 tmp = gen_label_rtx ();
8833 LABEL_NUSES (tmp) = 1;
8834 emit_label_after (tmp, location);
8835 location = NEXT_INSN (location);
8837 if (GET_CODE (pattern) == ADDR_VEC)
8838 label = XEXP (XVECEXP (pattern, 0, i), 0);
8840 label = XEXP (XVECEXP (pattern, 1, i), 0);
8842 tmp = gen_short_jump (label);
8844 /* Emit the jump itself. */
8845 tmp = emit_jump_insn_after (tmp, location);
8846 JUMP_LABEL (tmp) = label;
8847 LABEL_NUSES (label)++;
8848 location = NEXT_INSN (location);
8850 /* Emit a BARRIER after the jump. */
8851 emit_barrier_after (location);
8852 location = NEXT_INSN (location);
8855 /* Emit marker for the end of the branch table. */
8856 emit_insn_before (gen_end_brtab (), location);
8857 location = NEXT_INSN (location);
8858 emit_barrier_after (location);
8860 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8866 /* Still need brtab marker insns. FIXME: the presence of these
8867 markers disables output of the branch table to readonly memory,
8868 and any alignment directives that might be needed. Possibly,
8869 the begin_brtab insn should be output before the label for the
8870 table. This doesn't matter at the moment since the tables are
8871 always output in the text section. */
8872 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8874 /* Find an ADDR_VEC insn. */
8875 if (GET_CODE (insn) != JUMP_INSN
8876 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8877 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8880 /* Now generate markers for the beginning and end of the
8882 emit_insn_before (gen_begin_brtab (), insn);
8883 emit_insn_after (gen_end_brtab (), insn);
8888 /* The PA has a number of odd instructions which can perform multiple
8889 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8890 it may be profitable to combine two instructions into one instruction
8891 with two outputs. It's not profitable PA2.0 machines because the
8892 two outputs would take two slots in the reorder buffers.
8894 This routine finds instructions which can be combined and combines
8895 them. We only support some of the potential combinations, and we
8896 only try common ways to find suitable instructions.
8898 * addb can add two registers or a register and a small integer
8899 and jump to a nearby (+-8k) location. Normally the jump to the
8900 nearby location is conditional on the result of the add, but by
8901 using the "true" condition we can make the jump unconditional.
8902 Thus addb can perform two independent operations in one insn.
8904 * movb is similar to addb in that it can perform a reg->reg
8905 or small immediate->reg copy and jump to a nearby (+-8k location).
8907 * fmpyadd and fmpysub can perform a FP multiply and either an
8908 FP add or FP sub if the operands of the multiply and add/sub are
8909 independent (there are other minor restrictions). Note both
8910 the fmpy and fadd/fsub can in theory move to better spots according
8911 to data dependencies, but for now we require the fmpy stay at a
8914 * Many of the memory operations can perform pre & post updates
8915 of index registers. GCC's pre/post increment/decrement addressing
8916 is far too simple to take advantage of all the possibilities. This
8917 pass may not be suitable since those insns may not be independent.
8919 * comclr can compare two ints or an int and a register, nullify
8920 the following instruction and zero some other register. This
8921 is more difficult to use as it's harder to find an insn which
8922 will generate a comclr than finding something like an unconditional
8923 branch. (conditional moves & long branches create comclr insns).
8925 * Most arithmetic operations can conditionally skip the next
8926 instruction. They can be viewed as "perform this operation
8927 and conditionally jump to this nearby location" (where nearby
8928 is an insns away). These are difficult to use due to the
8929 branch length restrictions. */
8932 pa_combine_instructions (void)
8934 rtx anchor, new_rtx;
8936 /* This can get expensive since the basic algorithm is on the
8937 order of O(n^2) (or worse). Only do it for -O2 or higher
8938 levels of optimization. */
8942 /* Walk down the list of insns looking for "anchor" insns which
8943 may be combined with "floating" insns. As the name implies,
8944 "anchor" instructions don't move, while "floating" insns may
8946 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8947 new_rtx = make_insn_raw (new_rtx);
8949 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8951 enum attr_pa_combine_type anchor_attr;
8952 enum attr_pa_combine_type floater_attr;
8954 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8955 Also ignore any special USE insns. */
8956 if ((GET_CODE (anchor) != INSN
8957 && GET_CODE (anchor) != JUMP_INSN
8958 && GET_CODE (anchor) != CALL_INSN)
8959 || GET_CODE (PATTERN (anchor)) == USE
8960 || GET_CODE (PATTERN (anchor)) == CLOBBER
8961 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8962 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8965 anchor_attr = get_attr_pa_combine_type (anchor);
8966 /* See if anchor is an insn suitable for combination. */
8967 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8968 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8969 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8970 && ! forward_branch_p (anchor)))
8974 for (floater = PREV_INSN (anchor);
8976 floater = PREV_INSN (floater))
8978 if (GET_CODE (floater) == NOTE
8979 || (GET_CODE (floater) == INSN
8980 && (GET_CODE (PATTERN (floater)) == USE
8981 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8984 /* Anything except a regular INSN will stop our search. */
8985 if (GET_CODE (floater) != INSN
8986 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8987 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8993 /* See if FLOATER is suitable for combination with the
8995 floater_attr = get_attr_pa_combine_type (floater);
8996 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8997 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8998 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8999 && floater_attr == PA_COMBINE_TYPE_FMPY))
9001 /* If ANCHOR and FLOATER can be combined, then we're
9002 done with this pass. */
9003 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9004 SET_DEST (PATTERN (floater)),
9005 XEXP (SET_SRC (PATTERN (floater)), 0),
9006 XEXP (SET_SRC (PATTERN (floater)), 1)))
9010 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9011 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9013 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9015 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9016 SET_DEST (PATTERN (floater)),
9017 XEXP (SET_SRC (PATTERN (floater)), 0),
9018 XEXP (SET_SRC (PATTERN (floater)), 1)))
9023 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9024 SET_DEST (PATTERN (floater)),
9025 SET_SRC (PATTERN (floater)),
9026 SET_SRC (PATTERN (floater))))
9032 /* If we didn't find anything on the backwards scan try forwards. */
9034 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9035 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9037 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9039 if (GET_CODE (floater) == NOTE
9040 || (GET_CODE (floater) == INSN
9041 && (GET_CODE (PATTERN (floater)) == USE
9042 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9046 /* Anything except a regular INSN will stop our search. */
9047 if (GET_CODE (floater) != INSN
9048 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9049 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9055 /* See if FLOATER is suitable for combination with the
9057 floater_attr = get_attr_pa_combine_type (floater);
9058 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9059 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9060 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9061 && floater_attr == PA_COMBINE_TYPE_FMPY))
9063 /* If ANCHOR and FLOATER can be combined, then we're
9064 done with this pass. */
9065 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9066 SET_DEST (PATTERN (floater)),
9067 XEXP (SET_SRC (PATTERN (floater)),
9069 XEXP (SET_SRC (PATTERN (floater)),
9076 /* FLOATER will be nonzero if we found a suitable floating
9077 insn for combination with ANCHOR. */
9079 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9080 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9082 /* Emit the new instruction and delete the old anchor. */
9083 emit_insn_before (gen_rtx_PARALLEL
9085 gen_rtvec (2, PATTERN (anchor),
9086 PATTERN (floater))),
9089 SET_INSN_DELETED (anchor);
9091 /* Emit a special USE insn for FLOATER, then delete
9092 the floating insn. */
9093 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9094 delete_insn (floater);
9099 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9102 /* Emit the new_jump instruction and delete the old anchor. */
9104 = emit_jump_insn_before (gen_rtx_PARALLEL
9106 gen_rtvec (2, PATTERN (anchor),
9107 PATTERN (floater))),
9110 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9111 SET_INSN_DELETED (anchor);
9113 /* Emit a special USE insn for FLOATER, then delete
9114 the floating insn. */
9115 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9116 delete_insn (floater);
9124 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
9127 int insn_code_number;
9130 /* Create a PARALLEL with the patterns of ANCHOR and
9131 FLOATER, try to recognize it, then test constraints
9132 for the resulting pattern.
9134 If the pattern doesn't match or the constraints
9135 aren't met keep searching for a suitable floater
9137 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9138 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9139 INSN_CODE (new_rtx) = -1;
9140 insn_code_number = recog_memoized (new_rtx);
9141 if (insn_code_number < 0
9142 || (extract_insn (new_rtx), ! constrain_operands (1)))
9156 /* There's up to three operands to consider. One
9157 output and two inputs.
9159 The output must not be used between FLOATER & ANCHOR
9160 exclusive. The inputs must not be set between
9161 FLOATER and ANCHOR exclusive. */
9163 if (reg_used_between_p (dest, start, end))
9166 if (reg_set_between_p (src1, start, end))
9169 if (reg_set_between_p (src2, start, end))
9172 /* If we get here, then everything is good. */
9176 /* Return nonzero if references for INSN are delayed.
9178 Millicode insns are actually function calls with some special
9179 constraints on arguments and register usage.
9181 Millicode calls always expect their arguments in the integer argument
9182 registers, and always return their result in %r29 (ret1). They
9183 are expected to clobber their arguments, %r1, %r29, and the return
9184 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9186 This function tells reorg that the references to arguments and
9187 millicode calls do not appear to happen until after the millicode call.
9188 This allows reorg to put insns which set the argument registers into the
9189 delay slot of the millicode call -- thus they act more like traditional
9192 Note we cannot consider side effects of the insn to be delayed because
9193 the branch and link insn will clobber the return pointer. If we happened
9194 to use the return pointer in the delay slot of the call, then we lose.
9196 get_attr_type will try to recognize the given insn, so make sure to
9197 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9200 insn_refs_are_delayed (rtx insn)
9202 return ((GET_CODE (insn) == INSN
9203 && GET_CODE (PATTERN (insn)) != SEQUENCE
9204 && GET_CODE (PATTERN (insn)) != USE
9205 && GET_CODE (PATTERN (insn)) != CLOBBER
9206 && get_attr_type (insn) == TYPE_MILLI));
9209 /* Promote the return value, but not the arguments. */
9211 static enum machine_mode
9212 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9213 enum machine_mode mode,
9214 int *punsignedp ATTRIBUTE_UNUSED,
9215 const_tree fntype ATTRIBUTE_UNUSED,
9218 if (for_return == 0)
9220 return promote_mode (type, mode, punsignedp);
9223 /* On the HP-PA the value is found in register(s) 28(-29), unless
9224 the mode is SF or DF. Then the value is returned in fr4 (32).
9226 This must perform the same promotions as PROMOTE_MODE, else promoting
9227 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9229 Small structures must be returned in a PARALLEL on PA64 in order
9230 to match the HP Compiler ABI. */
9233 pa_function_value (const_tree valtype,
9234 const_tree func ATTRIBUTE_UNUSED,
9235 bool outgoing ATTRIBUTE_UNUSED)
9237 enum machine_mode valmode;
9239 if (AGGREGATE_TYPE_P (valtype)
9240 || TREE_CODE (valtype) == COMPLEX_TYPE
9241 || TREE_CODE (valtype) == VECTOR_TYPE)
9245 /* Aggregates with a size less than or equal to 128 bits are
9246 returned in GR 28(-29). They are left justified. The pad
9247 bits are undefined. Larger aggregates are returned in
9251 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9253 for (i = 0; i < ub; i++)
9255 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9256 gen_rtx_REG (DImode, 28 + i),
9261 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9263 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9265 /* Aggregates 5 to 8 bytes in size are returned in general
9266 registers r28-r29 in the same manner as other non
9267 floating-point objects. The data is right-justified and
9268 zero-extended to 64 bits. This is opposite to the normal
9269 justification used on big endian targets and requires
9270 special treatment. */
9271 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9272 gen_rtx_REG (DImode, 28), const0_rtx);
9273 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9277 if ((INTEGRAL_TYPE_P (valtype)
9278 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9279 || POINTER_TYPE_P (valtype))
9280 valmode = word_mode;
9282 valmode = TYPE_MODE (valtype);
9284 if (TREE_CODE (valtype) == REAL_TYPE
9285 && !AGGREGATE_TYPE_P (valtype)
9286 && TYPE_MODE (valtype) != TFmode
9287 && !TARGET_SOFT_FLOAT)
9288 return gen_rtx_REG (valmode, 32);
9290 return gen_rtx_REG (valmode, 28);
9293 /* Return the location of a parameter that is passed in a register or NULL
9294 if the parameter has any component that is passed in memory.
9296 This is new code and will be pushed to into the net sources after
9299 ??? We might want to restructure this so that it looks more like other
9302 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
9303 int named ATTRIBUTE_UNUSED)
9305 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9312 if (mode == VOIDmode)
9315 arg_size = FUNCTION_ARG_SIZE (mode, type);
9317 /* If this arg would be passed partially or totally on the stack, then
9318 this routine should return zero. pa_arg_partial_bytes will
9319 handle arguments which are split between regs and stack slots if
9320 the ABI mandates split arguments. */
9323 /* The 32-bit ABI does not split arguments. */
9324 if (cum->words + arg_size > max_arg_words)
9330 alignment = cum->words & 1;
9331 if (cum->words + alignment >= max_arg_words)
9335 /* The 32bit ABIs and the 64bit ABIs are rather different,
9336 particularly in their handling of FP registers. We might
9337 be able to cleverly share code between them, but I'm not
9338 going to bother in the hope that splitting them up results
9339 in code that is more easily understood. */
9343 /* Advance the base registers to their current locations.
9345 Remember, gprs grow towards smaller register numbers while
9346 fprs grow to higher register numbers. Also remember that
9347 although FP regs are 32-bit addressable, we pretend that
9348 the registers are 64-bits wide. */
9349 gpr_reg_base = 26 - cum->words;
9350 fpr_reg_base = 32 + cum->words;
9352 /* Arguments wider than one word and small aggregates need special
9356 || (type && (AGGREGATE_TYPE_P (type)
9357 || TREE_CODE (type) == COMPLEX_TYPE
9358 || TREE_CODE (type) == VECTOR_TYPE)))
9360 /* Double-extended precision (80-bit), quad-precision (128-bit)
9361 and aggregates including complex numbers are aligned on
9362 128-bit boundaries. The first eight 64-bit argument slots
9363 are associated one-to-one, with general registers r26
9364 through r19, and also with floating-point registers fr4
9365 through fr11. Arguments larger than one word are always
9366 passed in general registers.
9368 Using a PARALLEL with a word mode register results in left
9369 justified data on a big-endian target. */
9372 int i, offset = 0, ub = arg_size;
9374 /* Align the base register. */
9375 gpr_reg_base -= alignment;
9377 ub = MIN (ub, max_arg_words - cum->words - alignment);
9378 for (i = 0; i < ub; i++)
9380 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9381 gen_rtx_REG (DImode, gpr_reg_base),
9387 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9392 /* If the argument is larger than a word, then we know precisely
9393 which registers we must use. */
9407 /* Structures 5 to 8 bytes in size are passed in the general
9408 registers in the same manner as other non floating-point
9409 objects. The data is right-justified and zero-extended
9410 to 64 bits. This is opposite to the normal justification
9411 used on big endian targets and requires special treatment.
9412 We now define BLOCK_REG_PADDING to pad these objects.
9413 Aggregates, complex and vector types are passed in the same
9414 manner as structures. */
9416 || (type && (AGGREGATE_TYPE_P (type)
9417 || TREE_CODE (type) == COMPLEX_TYPE
9418 || TREE_CODE (type) == VECTOR_TYPE)))
9420 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9421 gen_rtx_REG (DImode, gpr_reg_base),
9423 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9428 /* We have a single word (32 bits). A simple computation
9429 will get us the register #s we need. */
9430 gpr_reg_base = 26 - cum->words;
9431 fpr_reg_base = 32 + 2 * cum->words;
9435 /* Determine if the argument needs to be passed in both general and
9436 floating point registers. */
9437 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9438 /* If we are doing soft-float with portable runtime, then there
9439 is no need to worry about FP regs. */
9440 && !TARGET_SOFT_FLOAT
9441 /* The parameter must be some kind of scalar float, else we just
9442 pass it in integer registers. */
9443 && GET_MODE_CLASS (mode) == MODE_FLOAT
9444 /* The target function must not have a prototype. */
9445 && cum->nargs_prototype <= 0
9446 /* libcalls do not need to pass items in both FP and general
9448 && type != NULL_TREE
9449 /* All this hair applies to "outgoing" args only. This includes
9450 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9452 /* Also pass outgoing floating arguments in both registers in indirect
9453 calls with the 32 bit ABI and the HP assembler since there is no
9454 way to the specify argument locations in static functions. */
9459 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9465 gen_rtx_EXPR_LIST (VOIDmode,
9466 gen_rtx_REG (mode, fpr_reg_base),
9468 gen_rtx_EXPR_LIST (VOIDmode,
9469 gen_rtx_REG (mode, gpr_reg_base),
9474 /* See if we should pass this parameter in a general register. */
9475 if (TARGET_SOFT_FLOAT
9476 /* Indirect calls in the normal 32bit ABI require all arguments
9477 to be passed in general registers. */
9478 || (!TARGET_PORTABLE_RUNTIME
9482 /* If the parameter is not a scalar floating-point parameter,
9483 then it belongs in GPRs. */
9484 || GET_MODE_CLASS (mode) != MODE_FLOAT
9485 /* Structure with single SFmode field belongs in GPR. */
9486 || (type && AGGREGATE_TYPE_P (type)))
9487 retval = gen_rtx_REG (mode, gpr_reg_base);
9489 retval = gen_rtx_REG (mode, fpr_reg_base);
9495 /* If this arg would be passed totally in registers or totally on the stack,
9496 then this routine should return zero. */
9499 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9500 tree type, bool named ATTRIBUTE_UNUSED)
9502 unsigned int max_arg_words = 8;
9503 unsigned int offset = 0;
9508 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9511 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9512 /* Arg fits fully into registers. */
9514 else if (cum->words + offset >= max_arg_words)
9515 /* Arg fully on the stack. */
9519 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9523 /* A get_unnamed_section callback for switching to the text section.
9525 This function is only used with SOM. Because we don't support
9526 named subspaces, we can only create a new subspace or switch back
9527 to the default text subspace. */
9530 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9532 gcc_assert (TARGET_SOM);
9535 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9537 /* We only want to emit a .nsubspa directive once at the
9538 start of the function. */
9539 cfun->machine->in_nsubspa = 1;
9541 /* Create a new subspace for the text. This provides
9542 better stub placement and one-only functions. */
9544 && DECL_ONE_ONLY (cfun->decl)
9545 && !DECL_WEAK (cfun->decl))
9547 output_section_asm_op ("\t.SPACE $TEXT$\n"
9548 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9549 "ACCESS=44,SORT=24,COMDAT");
9555 /* There isn't a current function or the body of the current
9556 function has been completed. So, we are changing to the
9557 text section to output debugging information. Thus, we
9558 need to forget that we are in the text section so that
9559 varasm.c will call us when text_section is selected again. */
9560 gcc_assert (!cfun || !cfun->machine
9561 || cfun->machine->in_nsubspa == 2);
9564 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9567 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9570 /* A get_unnamed_section callback for switching to comdat data
9571 sections. This function is only used with SOM. */
9574 som_output_comdat_data_section_asm_op (const void *data)
9577 output_section_asm_op (data);
9580 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9583 pa_som_asm_init_sections (void)
9586 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9588 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9589 is not being generated. */
9590 som_readonly_data_section
9591 = get_unnamed_section (0, output_section_asm_op,
9592 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9594 /* When secondary definitions are not supported, SOM makes readonly
9595 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9597 som_one_only_readonly_data_section
9598 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9600 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9601 "ACCESS=0x2c,SORT=16,COMDAT");
9604 /* When secondary definitions are not supported, SOM makes data one-only
9605 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9606 som_one_only_data_section
9607 = get_unnamed_section (SECTION_WRITE,
9608 som_output_comdat_data_section_asm_op,
9609 "\t.SPACE $PRIVATE$\n"
9610 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9611 "ACCESS=31,SORT=24,COMDAT");
9613 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9614 which reference data within the $TEXT$ space (for example constant
9615 strings in the $LIT$ subspace).
9617 The assemblers (GAS and HP as) both have problems with handling
9618 the difference of two symbols which is the other correct way to
9619 reference constant data during PIC code generation.
9621 So, there's no way to reference constant data which is in the
9622 $TEXT$ space during PIC generation. Instead place all constant
9623 data into the $PRIVATE$ subspace (this reduces sharing, but it
9624 works correctly). */
9625 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9627 /* We must not have a reference to an external symbol defined in a
9628 shared library in a readonly section, else the SOM linker will
9631 So, we force exception information into the data section. */
9632 exception_section = data_section;
9635 /* On hpux10, the linker will give an error if we have a reference
9636 in the read-only data section to a symbol defined in a shared
9637 library. Therefore, expressions that might require a reloc can
9638 not be placed in the read-only data section. */
9641 pa_select_section (tree exp, int reloc,
9642 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9644 if (TREE_CODE (exp) == VAR_DECL
9645 && TREE_READONLY (exp)
9646 && !TREE_THIS_VOLATILE (exp)
9647 && DECL_INITIAL (exp)
9648 && (DECL_INITIAL (exp) == error_mark_node
9649 || TREE_CONSTANT (DECL_INITIAL (exp)))
9653 && DECL_ONE_ONLY (exp)
9654 && !DECL_WEAK (exp))
9655 return som_one_only_readonly_data_section;
9657 return readonly_data_section;
9659 else if (CONSTANT_CLASS_P (exp) && !reloc)
9660 return readonly_data_section;
9662 && TREE_CODE (exp) == VAR_DECL
9663 && DECL_ONE_ONLY (exp)
9664 && !DECL_WEAK (exp))
9665 return som_one_only_data_section;
9667 return data_section;
9671 pa_globalize_label (FILE *stream, const char *name)
9673 /* We only handle DATA objects here, functions are globalized in
9674 ASM_DECLARE_FUNCTION_NAME. */
9675 if (! FUNCTION_NAME_P (name))
9677 fputs ("\t.EXPORT ", stream);
9678 assemble_name (stream, name);
9679 fputs (",DATA\n", stream);
9683 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9686 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9687 int incoming ATTRIBUTE_UNUSED)
9689 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9692 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9695 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9697 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9698 PA64 ABI says that objects larger than 128 bits are returned in memory.
9699 Note, int_size_in_bytes can return -1 if the size of the object is
9700 variable or larger than the maximum value that can be expressed as
9701 a HOST_WIDE_INT. It can also return zero for an empty type. The
9702 simplest way to handle variable and empty types is to pass them in
9703 memory. This avoids problems in defining the boundaries of argument
9704 slots, allocating registers, etc. */
9705 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9706 || int_size_in_bytes (type) <= 0);
9709 /* Structure to hold declaration and name of external symbols that are
9710 emitted by GCC. We generate a vector of these symbols and output them
9711 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9712 This avoids putting out names that are never really used. */
9714 typedef struct GTY(()) extern_symbol
9720 /* Define gc'd vector type for extern_symbol. */
9721 DEF_VEC_O(extern_symbol);
9722 DEF_VEC_ALLOC_O(extern_symbol,gc);
9724 /* Vector of extern_symbol pointers. */
9725 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9727 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9728 /* Mark DECL (name NAME) as an external reference (assembler output
9729 file FILE). This saves the names to output at the end of the file
9730 if actually referenced. */
9733 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9735 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9737 gcc_assert (file == asm_out_file);
9742 /* Output text required at the end of an assembler file.
9743 This includes deferred plabels and .import directives for
9744 all external symbols that were actually referenced. */
9747 pa_hpux_file_end (void)
9752 if (!NO_DEFERRED_PROFILE_COUNTERS)
9753 output_deferred_profile_counters ();
9755 output_deferred_plabels ();
9757 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9759 tree decl = p->decl;
9761 if (!TREE_ASM_WRITTEN (decl)
9762 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9763 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9766 VEC_free (extern_symbol, gc, extern_symbols);
9770 /* Return true if a change from mode FROM to mode TO for a register
9771 in register class RCLASS is invalid. */
9774 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9775 enum reg_class rclass)
9780 /* Reject changes to/from complex and vector modes. */
9781 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9782 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9785 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9788 /* There is no way to load QImode or HImode values directly from
9789 memory. SImode loads to the FP registers are not zero extended.
9790 On the 64-bit target, this conflicts with the definition of
9791 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9792 with different sizes in the floating-point registers. */
9793 if (MAYBE_FP_REG_CLASS_P (rclass))
9796 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9797 in specific sets of registers. Thus, we cannot allow changing
9798 to a larger mode when it's larger than a word. */
9799 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9800 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9806 /* Returns TRUE if it is a good idea to tie two pseudo registers
9807 when one has mode MODE1 and one has mode MODE2.
9808 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9809 for any hard reg, then this must be FALSE for correct output.
9811 We should return FALSE for QImode and HImode because these modes
9812 are not ok in the floating-point registers. However, this prevents
9813 tieing these modes to SImode and DImode in the general registers.
9814 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9815 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9816 in the floating-point registers. */
9819 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9821 /* Don't tie modes in different classes. */
9822 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9829 /* Length in units of the trampoline instruction code. */
9831 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9834 /* Output assembler code for a block containing the constant parts
9835 of a trampoline, leaving space for the variable parts.\
9837 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9838 and then branches to the specified routine.
9840 This code template is copied from text segment to stack location
9841 and then patched with pa_trampoline_init to contain valid values,
9842 and then entered as a subroutine.
9844 It is best to keep this as small as possible to avoid having to
9845 flush multiple lines in the cache. */
9848 pa_asm_trampoline_template (FILE *f)
9852 fputs ("\tldw 36(%r22),%r21\n", f);
9853 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
9854 if (ASSEMBLER_DIALECT == 0)
9855 fputs ("\tdepi 0,31,2,%r21\n", f);
9857 fputs ("\tdepwi 0,31,2,%r21\n", f);
9858 fputs ("\tldw 4(%r21),%r19\n", f);
9859 fputs ("\tldw 0(%r21),%r21\n", f);
9862 fputs ("\tbve (%r21)\n", f);
9863 fputs ("\tldw 40(%r22),%r29\n", f);
9864 fputs ("\t.word 0\n", f);
9865 fputs ("\t.word 0\n", f);
9869 fputs ("\tldsid (%r21),%r1\n", f);
9870 fputs ("\tmtsp %r1,%sr0\n", f);
9871 fputs ("\tbe 0(%sr0,%r21)\n", f);
9872 fputs ("\tldw 40(%r22),%r29\n", f);
9874 fputs ("\t.word 0\n", f);
9875 fputs ("\t.word 0\n", f);
9876 fputs ("\t.word 0\n", f);
9877 fputs ("\t.word 0\n", f);
9881 fputs ("\t.dword 0\n", f);
9882 fputs ("\t.dword 0\n", f);
9883 fputs ("\t.dword 0\n", f);
9884 fputs ("\t.dword 0\n", f);
9885 fputs ("\tmfia %r31\n", f);
9886 fputs ("\tldd 24(%r31),%r1\n", f);
9887 fputs ("\tldd 24(%r1),%r27\n", f);
9888 fputs ("\tldd 16(%r1),%r1\n", f);
9889 fputs ("\tbve (%r1)\n", f);
9890 fputs ("\tldd 32(%r31),%r31\n", f);
9891 fputs ("\t.dword 0 ; fptr\n", f);
9892 fputs ("\t.dword 0 ; static link\n", f);
9896 /* Emit RTL insns to initialize the variable parts of a trampoline.
9897 FNADDR is an RTX for the address of the function's pure code.
9898 CXT is an RTX for the static chain value for the function.
9900 Move the function address to the trampoline template at offset 36.
9901 Move the static chain value to trampoline template at offset 40.
9902 Move the trampoline address to trampoline template at offset 44.
9903 Move r19 to trampoline template at offset 48. The latter two
9904 words create a plabel for the indirect call to the trampoline.
9906 A similar sequence is used for the 64-bit port but the plabel is
9907 at the beginning of the trampoline.
9909 Finally, the cache entries for the trampoline code are flushed.
9910 This is necessary to ensure that the trampoline instruction sequence
9911 is written to memory prior to any attempts at prefetching the code
9915 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
9917 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
9918 rtx start_addr = gen_reg_rtx (Pmode);
9919 rtx end_addr = gen_reg_rtx (Pmode);
9920 rtx line_length = gen_reg_rtx (Pmode);
9923 emit_block_move (m_tramp, assemble_trampoline_template (),
9924 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
9925 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
9929 tmp = adjust_address (m_tramp, Pmode, 36);
9930 emit_move_insn (tmp, fnaddr);
9931 tmp = adjust_address (m_tramp, Pmode, 40);
9932 emit_move_insn (tmp, chain_value);
9934 /* Create a fat pointer for the trampoline. */
9935 tmp = adjust_address (m_tramp, Pmode, 44);
9936 emit_move_insn (tmp, r_tramp);
9937 tmp = adjust_address (m_tramp, Pmode, 48);
9938 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
9940 /* fdc and fic only use registers for the address to flush,
9941 they do not accept integer displacements. We align the
9942 start and end addresses to the beginning of their respective
9943 cache lines to minimize the number of lines flushed. */
9944 emit_insn (gen_andsi3 (start_addr, r_tramp,
9945 GEN_INT (-MIN_CACHELINE_SIZE)));
9946 tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1));
9947 emit_insn (gen_andsi3 (end_addr, tmp,
9948 GEN_INT (-MIN_CACHELINE_SIZE)));
9949 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
9950 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
9951 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
9952 gen_reg_rtx (Pmode),
9953 gen_reg_rtx (Pmode)));
9957 tmp = adjust_address (m_tramp, Pmode, 56);
9958 emit_move_insn (tmp, fnaddr);
9959 tmp = adjust_address (m_tramp, Pmode, 64);
9960 emit_move_insn (tmp, chain_value);
9962 /* Create a fat pointer for the trampoline. */
9963 tmp = adjust_address (m_tramp, Pmode, 16);
9964 emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32)));
9965 tmp = adjust_address (m_tramp, Pmode, 24);
9966 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
9968 /* fdc and fic only use registers for the address to flush,
9969 they do not accept integer displacements. We align the
9970 start and end addresses to the beginning of their respective
9971 cache lines to minimize the number of lines flushed. */
9972 tmp = force_reg (Pmode, plus_constant (r_tramp, 32));
9973 emit_insn (gen_anddi3 (start_addr, tmp,
9974 GEN_INT (-MIN_CACHELINE_SIZE)));
9975 tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1));
9976 emit_insn (gen_anddi3 (end_addr, tmp,
9977 GEN_INT (-MIN_CACHELINE_SIZE)));
9978 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
9979 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
9980 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
9981 gen_reg_rtx (Pmode),
9982 gen_reg_rtx (Pmode)));
9986 /* Perform any machine-specific adjustment in the address of the trampoline.
9987 ADDR contains the address that was passed to pa_trampoline_init.
9988 Adjust the trampoline address to point to the plabel at offset 44. */
9991 pa_trampoline_adjust_address (rtx addr)
9994 addr = memory_address (Pmode, plus_constant (addr, 46));