1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "integrate.h"
49 #include "target-def.h"
52 /* Return nonzero if there is a bypass for the output of
53 OUT_INSN and the fp store IN_INSN. */
55 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
57 enum machine_mode store_mode;
58 enum machine_mode other_mode;
61 if (recog_memoized (in_insn) < 0
62 || (get_attr_type (in_insn) != TYPE_FPSTORE
63 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
64 || recog_memoized (out_insn) < 0)
67 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
69 set = single_set (out_insn);
73 other_mode = GET_MODE (SET_SRC (set));
75 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
79 #ifndef DO_FRAME_NOTES
80 #ifdef INCOMING_RETURN_ADDR_RTX
81 #define DO_FRAME_NOTES 1
83 #define DO_FRAME_NOTES 0
87 static void copy_reg_pointer (rtx, rtx);
88 static void fix_range (const char *);
89 static bool pa_handle_option (size_t, const char *, int);
90 static int hppa_address_cost (rtx, bool);
91 static bool hppa_rtx_costs (rtx, int, int, int *, bool);
92 static inline rtx force_mode (enum machine_mode, rtx);
93 static void pa_reorg (void);
94 static void pa_combine_instructions (void);
95 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
96 static bool forward_branch_p (rtx);
97 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
98 static int compute_movmem_length (rtx);
99 static int compute_clrmem_length (rtx);
100 static bool pa_assemble_integer (rtx, unsigned int, int);
101 static void remove_useless_addtr_insns (int);
102 static void store_reg (int, HOST_WIDE_INT, int);
103 static void store_reg_modify (int, int, HOST_WIDE_INT);
104 static void load_reg (int, HOST_WIDE_INT, int);
105 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
106 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
107 static void update_total_code_bytes (unsigned int);
108 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
109 static int pa_adjust_cost (rtx, rtx, rtx, int);
110 static int pa_adjust_priority (rtx, int);
111 static int pa_issue_rate (void);
112 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
113 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
115 static void pa_encode_section_info (tree, rtx, int);
116 static const char *pa_strip_name_encoding (const char *);
117 static bool pa_function_ok_for_sibcall (tree, tree);
118 static void pa_globalize_label (FILE *, const char *)
120 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
121 HOST_WIDE_INT, tree);
122 #if !defined(USE_COLLECT2)
123 static void pa_asm_out_constructor (rtx, int);
124 static void pa_asm_out_destructor (rtx, int);
126 static void pa_init_builtins (void);
127 static rtx hppa_builtin_saveregs (void);
128 static void hppa_va_start (tree, rtx);
129 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
130 static bool pa_scalar_mode_supported_p (enum machine_mode);
131 static bool pa_commutative_p (const_rtx x, int outer_code);
132 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
133 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
134 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
135 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
136 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
137 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
138 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
139 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
140 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
141 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
142 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
143 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
144 static void output_deferred_plabels (void);
145 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
146 #ifdef ASM_OUTPUT_EXTERNAL_REAL
147 static void pa_hpux_file_end (void);
149 #ifdef HPUX_LONG_DOUBLE_LIBRARY
150 static void pa_hpux_init_libfuncs (void);
152 static rtx pa_struct_value_rtx (tree, int);
153 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
155 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
157 static struct machine_function * pa_init_machine_status (void);
158 static enum reg_class pa_secondary_reload (bool, rtx, enum reg_class,
160 secondary_reload_info *);
161 static void pa_extra_live_on_entry (bitmap);
163 /* The following extra sections are only used for SOM. */
164 static GTY(()) section *som_readonly_data_section;
165 static GTY(()) section *som_one_only_readonly_data_section;
166 static GTY(()) section *som_one_only_data_section;
168 /* Which cpu we are scheduling for. */
169 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
171 /* The UNIX standard to use for predefines and linking. */
172 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
174 /* Counts for the number of callee-saved general and floating point
175 registers which were saved by the current function's prologue. */
176 static int gr_saved, fr_saved;
178 /* Boolean indicating whether the return pointer was saved by the
179 current function's prologue. */
180 static bool rp_saved;
182 static rtx find_addr_reg (rtx);
184 /* Keep track of the number of bytes we have output in the CODE subspace
185 during this compilation so we'll know when to emit inline long-calls. */
186 unsigned long total_code_bytes;
188 /* The last address of the previous function plus the number of bytes in
189 associated thunks that have been output. This is used to determine if
190 a thunk can use an IA-relative branch to reach its target function. */
191 static unsigned int last_address;
193 /* Variables to handle plabels that we discover are necessary at assembly
194 output time. They are output after the current function. */
195 struct GTY(()) deferred_plabel
200 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
202 static size_t n_deferred_plabels = 0;
205 /* Initialize the GCC target structure. */
207 #undef TARGET_ASM_ALIGNED_HI_OP
208 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
209 #undef TARGET_ASM_ALIGNED_SI_OP
210 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
211 #undef TARGET_ASM_ALIGNED_DI_OP
212 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
213 #undef TARGET_ASM_UNALIGNED_HI_OP
214 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
215 #undef TARGET_ASM_UNALIGNED_SI_OP
216 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
217 #undef TARGET_ASM_UNALIGNED_DI_OP
218 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
219 #undef TARGET_ASM_INTEGER
220 #define TARGET_ASM_INTEGER pa_assemble_integer
222 #undef TARGET_ASM_FUNCTION_PROLOGUE
223 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
224 #undef TARGET_ASM_FUNCTION_EPILOGUE
225 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
227 #undef TARGET_LEGITIMIZE_ADDRESS
228 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
230 #undef TARGET_SCHED_ADJUST_COST
231 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
232 #undef TARGET_SCHED_ADJUST_PRIORITY
233 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
234 #undef TARGET_SCHED_ISSUE_RATE
235 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
237 #undef TARGET_ENCODE_SECTION_INFO
238 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
239 #undef TARGET_STRIP_NAME_ENCODING
240 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
242 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
243 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
245 #undef TARGET_COMMUTATIVE_P
246 #define TARGET_COMMUTATIVE_P pa_commutative_p
248 #undef TARGET_ASM_OUTPUT_MI_THUNK
249 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
250 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
251 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
253 #undef TARGET_ASM_FILE_END
254 #ifdef ASM_OUTPUT_EXTERNAL_REAL
255 #define TARGET_ASM_FILE_END pa_hpux_file_end
257 #define TARGET_ASM_FILE_END output_deferred_plabels
260 #if !defined(USE_COLLECT2)
261 #undef TARGET_ASM_CONSTRUCTOR
262 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
263 #undef TARGET_ASM_DESTRUCTOR
264 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
267 #undef TARGET_DEFAULT_TARGET_FLAGS
268 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
269 #undef TARGET_HANDLE_OPTION
270 #define TARGET_HANDLE_OPTION pa_handle_option
272 #undef TARGET_INIT_BUILTINS
273 #define TARGET_INIT_BUILTINS pa_init_builtins
275 #undef TARGET_RTX_COSTS
276 #define TARGET_RTX_COSTS hppa_rtx_costs
277 #undef TARGET_ADDRESS_COST
278 #define TARGET_ADDRESS_COST hppa_address_cost
280 #undef TARGET_MACHINE_DEPENDENT_REORG
281 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
283 #ifdef HPUX_LONG_DOUBLE_LIBRARY
284 #undef TARGET_INIT_LIBFUNCS
285 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
288 #undef TARGET_PROMOTE_FUNCTION_RETURN
289 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
290 #undef TARGET_PROMOTE_PROTOTYPES
291 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
293 #undef TARGET_STRUCT_VALUE_RTX
294 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
295 #undef TARGET_RETURN_IN_MEMORY
296 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
297 #undef TARGET_MUST_PASS_IN_STACK
298 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
299 #undef TARGET_PASS_BY_REFERENCE
300 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
301 #undef TARGET_CALLEE_COPIES
302 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
303 #undef TARGET_ARG_PARTIAL_BYTES
304 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
306 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
307 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
308 #undef TARGET_EXPAND_BUILTIN_VA_START
309 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
310 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
311 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
313 #undef TARGET_SCALAR_MODE_SUPPORTED_P
314 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
316 #undef TARGET_CANNOT_FORCE_CONST_MEM
317 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
319 #undef TARGET_SECONDARY_RELOAD
320 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
322 #undef TARGET_EXTRA_LIVE_ON_ENTRY
323 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
325 struct gcc_target targetm = TARGET_INITIALIZER;
327 /* Parse the -mfixed-range= option string. */
330 fix_range (const char *const_str)
333 char *str, *dash, *comma;
335 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
336 REG2 are either register names or register numbers. The effect
337 of this option is to mark the registers in the range from REG1 to
338 REG2 as ``fixed'' so they won't be used by the compiler. This is
339 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
341 i = strlen (const_str);
342 str = (char *) alloca (i + 1);
343 memcpy (str, const_str, i + 1);
347 dash = strchr (str, '-');
350 warning (0, "value of -mfixed-range must have form REG1-REG2");
355 comma = strchr (dash + 1, ',');
359 first = decode_reg_name (str);
362 warning (0, "unknown register name: %s", str);
366 last = decode_reg_name (dash + 1);
369 warning (0, "unknown register name: %s", dash + 1);
377 warning (0, "%s-%s is an empty range", str, dash + 1);
381 for (i = first; i <= last; ++i)
382 fixed_regs[i] = call_used_regs[i] = 1;
391 /* Check if all floating point registers have been fixed. */
392 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
397 target_flags |= MASK_DISABLE_FPREGS;
400 /* Implement TARGET_HANDLE_OPTION. */
403 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
408 case OPT_mpa_risc_1_0:
410 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
414 case OPT_mpa_risc_1_1:
416 target_flags &= ~MASK_PA_20;
417 target_flags |= MASK_PA_11;
420 case OPT_mpa_risc_2_0:
422 target_flags |= MASK_PA_11 | MASK_PA_20;
426 if (strcmp (arg, "8000") == 0)
427 pa_cpu = PROCESSOR_8000;
428 else if (strcmp (arg, "7100") == 0)
429 pa_cpu = PROCESSOR_7100;
430 else if (strcmp (arg, "700") == 0)
431 pa_cpu = PROCESSOR_700;
432 else if (strcmp (arg, "7100LC") == 0)
433 pa_cpu = PROCESSOR_7100LC;
434 else if (strcmp (arg, "7200") == 0)
435 pa_cpu = PROCESSOR_7200;
436 else if (strcmp (arg, "7300") == 0)
437 pa_cpu = PROCESSOR_7300;
442 case OPT_mfixed_range_:
452 #if TARGET_HPUX_10_10
458 #if TARGET_HPUX_11_11
470 override_options (void)
472 /* Unconditional branches in the delay slot are not compatible with dwarf2
473 call frame information. There is no benefit in using this optimization
474 on PA8000 and later processors. */
475 if (pa_cpu >= PROCESSOR_8000
476 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
477 || flag_unwind_tables)
478 target_flags &= ~MASK_JUMP_IN_DELAY;
480 if (flag_pic && TARGET_PORTABLE_RUNTIME)
482 warning (0, "PIC code generation is not supported in the portable runtime model");
485 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
487 warning (0, "PIC code generation is not compatible with fast indirect calls");
490 if (! TARGET_GAS && write_symbols != NO_DEBUG)
492 warning (0, "-g is only supported when using GAS on this processor,");
493 warning (0, "-g option disabled");
494 write_symbols = NO_DEBUG;
497 /* We only support the "big PIC" model now. And we always generate PIC
498 code when in 64bit mode. */
499 if (flag_pic == 1 || TARGET_64BIT)
502 /* We can't guarantee that .dword is available for 32-bit targets. */
503 if (UNITS_PER_WORD == 4)
504 targetm.asm_out.aligned_op.di = NULL;
506 /* The unaligned ops are only available when using GAS. */
509 targetm.asm_out.unaligned_op.hi = NULL;
510 targetm.asm_out.unaligned_op.si = NULL;
511 targetm.asm_out.unaligned_op.di = NULL;
514 init_machine_status = pa_init_machine_status;
518 pa_init_builtins (void)
520 #ifdef DONT_HAVE_FPUTC_UNLOCKED
521 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
522 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
523 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
524 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
527 if (built_in_decls [BUILT_IN_FINITE])
528 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
529 if (built_in_decls [BUILT_IN_FINITEF])
530 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
534 /* Function to init struct machine_function.
535 This will be called, via a pointer variable,
536 from push_function_context. */
538 static struct machine_function *
539 pa_init_machine_status (void)
541 return GGC_CNEW (machine_function);
544 /* If FROM is a probable pointer register, mark TO as a probable
545 pointer register with the same pointer alignment as FROM. */
548 copy_reg_pointer (rtx to, rtx from)
550 if (REG_POINTER (from))
551 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
554 /* Return 1 if X contains a symbolic expression. We know these
555 expressions will have one of a few well defined forms, so
556 we need only check those forms. */
558 symbolic_expression_p (rtx x)
561 /* Strip off any HIGH. */
562 if (GET_CODE (x) == HIGH)
565 return (symbolic_operand (x, VOIDmode));
568 /* Accept any constant that can be moved in one instruction into a
571 cint_ok_for_move (HOST_WIDE_INT ival)
573 /* OK if ldo, ldil, or zdepi, can be used. */
574 return (VAL_14_BITS_P (ival)
575 || ldil_cint_p (ival)
576 || zdepi_cint_p (ival));
579 /* Return truth value of whether OP can be used as an operand in a
582 adddi3_operand (rtx op, enum machine_mode mode)
584 return (register_operand (op, mode)
585 || (GET_CODE (op) == CONST_INT
586 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
589 /* True iff the operand OP can be used as the destination operand of
590 an integer store. This also implies the operand could be used as
591 the source operand of an integer load. Symbolic, lo_sum and indexed
592 memory operands are not allowed. We accept reloading pseudos and
593 other memory operands. */
595 integer_store_memory_operand (rtx op, enum machine_mode mode)
597 return ((reload_in_progress
599 && REGNO (op) >= FIRST_PSEUDO_REGISTER
600 && reg_renumber [REGNO (op)] < 0)
601 || (GET_CODE (op) == MEM
602 && (reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
603 && !symbolic_memory_operand (op, VOIDmode)
604 && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
605 && !IS_INDEX_ADDR_P (XEXP (op, 0))));
608 /* True iff ldil can be used to load this CONST_INT. The least
609 significant 11 bits of the value must be zero and the value must
610 not change sign when extended from 32 to 64 bits. */
612 ldil_cint_p (HOST_WIDE_INT ival)
614 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
616 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
619 /* True iff zdepi can be used to generate this CONST_INT.
620 zdepi first sign extends a 5-bit signed number to a given field
621 length, then places this field anywhere in a zero. */
623 zdepi_cint_p (unsigned HOST_WIDE_INT x)
625 unsigned HOST_WIDE_INT lsb_mask, t;
627 /* This might not be obvious, but it's at least fast.
628 This function is critical; we don't have the time loops would take. */
630 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
631 /* Return true iff t is a power of two. */
632 return ((t & (t - 1)) == 0);
635 /* True iff depi or extru can be used to compute (reg & mask).
636 Accept bit pattern like these:
641 and_mask_p (unsigned HOST_WIDE_INT mask)
644 mask += mask & -mask;
645 return (mask & (mask - 1)) == 0;
648 /* True iff depi can be used to compute (reg | MASK). */
650 ior_mask_p (unsigned HOST_WIDE_INT mask)
652 mask += mask & -mask;
653 return (mask & (mask - 1)) == 0;
656 /* Legitimize PIC addresses. If the address is already
657 position-independent, we return ORIG. Newly generated
658 position-independent addresses go to REG. If we need more
659 than one register, we lose. */
662 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
666 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
668 /* Labels need special handling. */
669 if (pic_label_operand (orig, mode))
673 /* We do not want to go through the movXX expanders here since that
674 would create recursion.
676 Nor do we really want to call a generator for a named pattern
677 since that requires multiple patterns if we want to support
680 So instead we just emit the raw set, which avoids the movXX
681 expanders completely. */
682 mark_reg_pointer (reg, BITS_PER_UNIT);
683 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
685 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
686 add_reg_note (insn, REG_EQUAL, orig);
688 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
689 and update LABEL_NUSES because this is not done automatically. */
690 if (reload_in_progress || reload_completed)
692 /* Extract LABEL_REF. */
693 if (GET_CODE (orig) == CONST)
694 orig = XEXP (XEXP (orig, 0), 0);
695 /* Extract CODE_LABEL. */
696 orig = XEXP (orig, 0);
697 add_reg_note (insn, REG_LABEL_OPERAND, orig);
698 LABEL_NUSES (orig)++;
700 crtl->uses_pic_offset_table = 1;
703 if (GET_CODE (orig) == SYMBOL_REF)
709 /* Before reload, allocate a temporary register for the intermediate
710 result. This allows the sequence to be deleted when the final
711 result is unused and the insns are trivially dead. */
712 tmp_reg = ((reload_in_progress || reload_completed)
713 ? reg : gen_reg_rtx (Pmode));
715 if (function_label_operand (orig, mode))
717 /* Force function label into memory in word mode. */
718 orig = XEXP (force_const_mem (word_mode, orig), 0);
719 /* Load plabel address from DLT. */
720 emit_move_insn (tmp_reg,
721 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
722 gen_rtx_HIGH (word_mode, orig)));
724 = gen_const_mem (Pmode,
725 gen_rtx_LO_SUM (Pmode, tmp_reg,
726 gen_rtx_UNSPEC (Pmode,
729 emit_move_insn (reg, pic_ref);
730 /* Now load address of function descriptor. */
731 pic_ref = gen_rtx_MEM (Pmode, reg);
735 /* Load symbol reference from DLT. */
736 emit_move_insn (tmp_reg,
737 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
738 gen_rtx_HIGH (word_mode, orig)));
740 = gen_const_mem (Pmode,
741 gen_rtx_LO_SUM (Pmode, tmp_reg,
742 gen_rtx_UNSPEC (Pmode,
747 crtl->uses_pic_offset_table = 1;
748 mark_reg_pointer (reg, BITS_PER_UNIT);
749 insn = emit_move_insn (reg, pic_ref);
751 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
752 set_unique_reg_note (insn, REG_EQUAL, orig);
756 else if (GET_CODE (orig) == CONST)
760 if (GET_CODE (XEXP (orig, 0)) == PLUS
761 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
765 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
767 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
768 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
769 base == reg ? 0 : reg);
771 if (GET_CODE (orig) == CONST_INT)
773 if (INT_14_BITS (orig))
774 return plus_constant (base, INTVAL (orig));
775 orig = force_reg (Pmode, orig);
777 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
778 /* Likewise, should we set special REG_NOTEs here? */
784 static GTY(()) rtx gen_tls_tga;
787 gen_tls_get_addr (void)
790 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
795 hppa_tls_call (rtx arg)
799 ret = gen_reg_rtx (Pmode);
800 emit_library_call_value (gen_tls_get_addr (), ret,
801 LCT_CONST, Pmode, 1, arg, Pmode);
807 legitimize_tls_address (rtx addr)
809 rtx ret, insn, tmp, t1, t2, tp;
810 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
814 case TLS_MODEL_GLOBAL_DYNAMIC:
815 tmp = gen_reg_rtx (Pmode);
817 emit_insn (gen_tgd_load_pic (tmp, addr));
819 emit_insn (gen_tgd_load (tmp, addr));
820 ret = hppa_tls_call (tmp);
823 case TLS_MODEL_LOCAL_DYNAMIC:
824 ret = gen_reg_rtx (Pmode);
825 tmp = gen_reg_rtx (Pmode);
828 emit_insn (gen_tld_load_pic (tmp, addr));
830 emit_insn (gen_tld_load (tmp, addr));
831 t1 = hppa_tls_call (tmp);
834 t2 = gen_reg_rtx (Pmode);
835 emit_libcall_block (insn, t2, t1,
836 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
838 emit_insn (gen_tld_offset_load (ret, addr, t2));
841 case TLS_MODEL_INITIAL_EXEC:
842 tp = gen_reg_rtx (Pmode);
843 tmp = gen_reg_rtx (Pmode);
844 ret = gen_reg_rtx (Pmode);
845 emit_insn (gen_tp_load (tp));
847 emit_insn (gen_tie_load_pic (tmp, addr));
849 emit_insn (gen_tie_load (tmp, addr));
850 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
853 case TLS_MODEL_LOCAL_EXEC:
854 tp = gen_reg_rtx (Pmode);
855 ret = gen_reg_rtx (Pmode);
856 emit_insn (gen_tp_load (tp));
857 emit_insn (gen_tle_load (ret, addr, tp));
867 /* Try machine-dependent ways of modifying an illegitimate address
868 to be legitimate. If we find one, return the new, valid address.
869 This macro is used in only one place: `memory_address' in explow.c.
871 OLDX is the address as it was before break_out_memory_refs was called.
872 In some cases it is useful to look at this to decide what needs to be done.
874 It is always safe for this macro to do nothing. It exists to recognize
875 opportunities to optimize the output.
877 For the PA, transform:
879 memory(X + <large int>)
883 if (<large int> & mask) >= 16
884 Y = (<large int> & ~mask) + mask + 1 Round up.
886 Y = (<large int> & ~mask) Round down.
888 memory (Z + (<large int> - Y));
890 This is for CSE to find several similar references, and only use one Z.
892 X can either be a SYMBOL_REF or REG, but because combine cannot
893 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
894 D will not fit in 14 bits.
896 MODE_FLOAT references allow displacements which fit in 5 bits, so use
899 MODE_INT references allow displacements which fit in 14 bits, so use
902 This relies on the fact that most mode MODE_FLOAT references will use FP
903 registers and most mode MODE_INT references will use integer registers.
904 (In the rare case of an FP register used in an integer MODE, we depend
905 on secondary reloads to clean things up.)
908 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
909 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
910 addressing modes to be used).
912 Put X and Z into registers. Then put the entire expression into
916 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
917 enum machine_mode mode)
921 /* We need to canonicalize the order of operands in unscaled indexed
922 addresses since the code that checks if an address is valid doesn't
923 always try both orders. */
924 if (!TARGET_NO_SPACE_REGS
925 && GET_CODE (x) == PLUS
926 && GET_MODE (x) == Pmode
927 && REG_P (XEXP (x, 0))
928 && REG_P (XEXP (x, 1))
929 && REG_POINTER (XEXP (x, 0))
930 && !REG_POINTER (XEXP (x, 1)))
931 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
933 if (PA_SYMBOL_REF_TLS_P (x))
934 return legitimize_tls_address (x);
936 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
938 /* Strip off CONST. */
939 if (GET_CODE (x) == CONST)
942 /* Special case. Get the SYMBOL_REF into a register and use indexing.
943 That should always be safe. */
944 if (GET_CODE (x) == PLUS
945 && GET_CODE (XEXP (x, 0)) == REG
946 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
948 rtx reg = force_reg (Pmode, XEXP (x, 1));
949 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
952 /* Note we must reject symbols which represent function addresses
953 since the assembler/linker can't handle arithmetic on plabels. */
954 if (GET_CODE (x) == PLUS
955 && GET_CODE (XEXP (x, 1)) == CONST_INT
956 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
957 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
958 || GET_CODE (XEXP (x, 0)) == REG))
960 rtx int_part, ptr_reg;
962 int offset = INTVAL (XEXP (x, 1));
965 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
966 ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
968 /* Choose which way to round the offset. Round up if we
969 are >= halfway to the next boundary. */
970 if ((offset & mask) >= ((mask + 1) / 2))
971 newoffset = (offset & ~ mask) + mask + 1;
973 newoffset = (offset & ~ mask);
975 /* If the newoffset will not fit in 14 bits (ldo), then
976 handling this would take 4 or 5 instructions (2 to load
977 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
978 add the new offset and the SYMBOL_REF.) Combine can
979 not handle 4->2 or 5->2 combinations, so do not create
981 if (! VAL_14_BITS_P (newoffset)
982 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
984 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
987 gen_rtx_HIGH (Pmode, const_part));
990 gen_rtx_LO_SUM (Pmode,
991 tmp_reg, const_part));
995 if (! VAL_14_BITS_P (newoffset))
996 int_part = force_reg (Pmode, GEN_INT (newoffset));
998 int_part = GEN_INT (newoffset);
1000 ptr_reg = force_reg (Pmode,
1001 gen_rtx_PLUS (Pmode,
1002 force_reg (Pmode, XEXP (x, 0)),
1005 return plus_constant (ptr_reg, offset - newoffset);
1008 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1010 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1011 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1012 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1013 && (OBJECT_P (XEXP (x, 1))
1014 || GET_CODE (XEXP (x, 1)) == SUBREG)
1015 && GET_CODE (XEXP (x, 1)) != CONST)
1017 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1021 if (GET_CODE (reg1) != REG)
1022 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1024 reg2 = XEXP (XEXP (x, 0), 0);
1025 if (GET_CODE (reg2) != REG)
1026 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1028 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1029 gen_rtx_MULT (Pmode,
1035 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1037 Only do so for floating point modes since this is more speculative
1038 and we lose if it's an integer store. */
1039 if (GET_CODE (x) == PLUS
1040 && GET_CODE (XEXP (x, 0)) == PLUS
1041 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1042 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1043 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1044 && (mode == SFmode || mode == DFmode))
1047 /* First, try and figure out what to use as a base register. */
1048 rtx reg1, reg2, base, idx, orig_base;
1050 reg1 = XEXP (XEXP (x, 0), 1);
1055 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1056 then emit_move_sequence will turn on REG_POINTER so we'll know
1057 it's a base register below. */
1058 if (GET_CODE (reg1) != REG)
1059 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1061 if (GET_CODE (reg2) != REG)
1062 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1064 /* Figure out what the base and index are. */
1066 if (GET_CODE (reg1) == REG
1067 && REG_POINTER (reg1))
1070 orig_base = XEXP (XEXP (x, 0), 1);
1071 idx = gen_rtx_PLUS (Pmode,
1072 gen_rtx_MULT (Pmode,
1073 XEXP (XEXP (XEXP (x, 0), 0), 0),
1074 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1077 else if (GET_CODE (reg2) == REG
1078 && REG_POINTER (reg2))
1081 orig_base = XEXP (x, 1);
1088 /* If the index adds a large constant, try to scale the
1089 constant so that it can be loaded with only one insn. */
1090 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1091 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1092 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1093 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1095 /* Divide the CONST_INT by the scale factor, then add it to A. */
1096 int val = INTVAL (XEXP (idx, 1));
1098 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1099 reg1 = XEXP (XEXP (idx, 0), 0);
1100 if (GET_CODE (reg1) != REG)
1101 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1103 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1105 /* We can now generate a simple scaled indexed address. */
1108 (Pmode, gen_rtx_PLUS (Pmode,
1109 gen_rtx_MULT (Pmode, reg1,
1110 XEXP (XEXP (idx, 0), 1)),
1114 /* If B + C is still a valid base register, then add them. */
1115 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1116 && INTVAL (XEXP (idx, 1)) <= 4096
1117 && INTVAL (XEXP (idx, 1)) >= -4096)
1119 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1122 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1124 reg2 = XEXP (XEXP (idx, 0), 0);
1125 if (GET_CODE (reg2) != CONST_INT)
1126 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1128 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1129 gen_rtx_MULT (Pmode,
1135 /* Get the index into a register, then add the base + index and
1136 return a register holding the result. */
1138 /* First get A into a register. */
1139 reg1 = XEXP (XEXP (idx, 0), 0);
1140 if (GET_CODE (reg1) != REG)
1141 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1143 /* And get B into a register. */
1144 reg2 = XEXP (idx, 1);
1145 if (GET_CODE (reg2) != REG)
1146 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1148 reg1 = force_reg (Pmode,
1149 gen_rtx_PLUS (Pmode,
1150 gen_rtx_MULT (Pmode, reg1,
1151 XEXP (XEXP (idx, 0), 1)),
1154 /* Add the result to our base register and return. */
1155 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1159 /* Uh-oh. We might have an address for x[n-100000]. This needs
1160 special handling to avoid creating an indexed memory address
1161 with x-100000 as the base.
1163 If the constant part is small enough, then it's still safe because
1164 there is a guard page at the beginning and end of the data segment.
1166 Scaled references are common enough that we want to try and rearrange the
1167 terms so that we can use indexing for these addresses too. Only
1168 do the optimization for floatint point modes. */
1170 if (GET_CODE (x) == PLUS
1171 && symbolic_expression_p (XEXP (x, 1)))
1173 /* Ugly. We modify things here so that the address offset specified
1174 by the index expression is computed first, then added to x to form
1175 the entire address. */
1177 rtx regx1, regx2, regy1, regy2, y;
1179 /* Strip off any CONST. */
1181 if (GET_CODE (y) == CONST)
1184 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1186 /* See if this looks like
1187 (plus (mult (reg) (shadd_const))
1188 (const (plus (symbol_ref) (const_int))))
1190 Where const_int is small. In that case the const
1191 expression is a valid pointer for indexing.
1193 If const_int is big, but can be divided evenly by shadd_const
1194 and added to (reg). This allows more scaled indexed addresses. */
1195 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1196 && GET_CODE (XEXP (x, 0)) == MULT
1197 && GET_CODE (XEXP (y, 1)) == CONST_INT
1198 && INTVAL (XEXP (y, 1)) >= -4096
1199 && INTVAL (XEXP (y, 1)) <= 4095
1200 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1201 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1203 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1207 if (GET_CODE (reg1) != REG)
1208 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1210 reg2 = XEXP (XEXP (x, 0), 0);
1211 if (GET_CODE (reg2) != REG)
1212 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1214 return force_reg (Pmode,
1215 gen_rtx_PLUS (Pmode,
1216 gen_rtx_MULT (Pmode,
1221 else if ((mode == DFmode || mode == SFmode)
1222 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1223 && GET_CODE (XEXP (x, 0)) == MULT
1224 && GET_CODE (XEXP (y, 1)) == CONST_INT
1225 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1226 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1227 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1230 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1231 / INTVAL (XEXP (XEXP (x, 0), 1))));
1232 regx2 = XEXP (XEXP (x, 0), 0);
1233 if (GET_CODE (regx2) != REG)
1234 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1235 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1239 gen_rtx_PLUS (Pmode,
1240 gen_rtx_MULT (Pmode, regx2,
1241 XEXP (XEXP (x, 0), 1)),
1242 force_reg (Pmode, XEXP (y, 0))));
1244 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1245 && INTVAL (XEXP (y, 1)) >= -4096
1246 && INTVAL (XEXP (y, 1)) <= 4095)
1248 /* This is safe because of the guard page at the
1249 beginning and end of the data space. Just
1250 return the original address. */
1255 /* Doesn't look like one we can optimize. */
1256 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1257 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1258 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1259 regx1 = force_reg (Pmode,
1260 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1262 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1270 /* For the HPPA, REG and REG+CONST is cost 0
1271 and addresses involving symbolic constants are cost 2.
1273 PIC addresses are very expensive.
1275 It is no coincidence that this has the same structure
1276 as GO_IF_LEGITIMATE_ADDRESS. */
1279 hppa_address_cost (rtx X,
1280 bool speed ATTRIBUTE_UNUSED)
1282 switch (GET_CODE (X))
1295 /* Compute a (partial) cost for rtx X. Return true if the complete
1296 cost has been computed, and false if subexpressions should be
1297 scanned. In either case, *TOTAL contains the cost result. */
1300 hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
1301 bool speed ATTRIBUTE_UNUSED)
1306 if (INTVAL (x) == 0)
1308 else if (INT_14_BITS (x))
1325 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1326 && outer_code != SET)
1333 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1334 *total = COSTS_N_INSNS (3);
1335 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1336 *total = COSTS_N_INSNS (8);
1338 *total = COSTS_N_INSNS (20);
1342 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1344 *total = COSTS_N_INSNS (14);
1352 *total = COSTS_N_INSNS (60);
1355 case PLUS: /* this includes shNadd insns */
1357 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1358 *total = COSTS_N_INSNS (3);
1360 *total = COSTS_N_INSNS (1);
1366 *total = COSTS_N_INSNS (1);
1374 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1375 new rtx with the correct mode. */
1377 force_mode (enum machine_mode mode, rtx orig)
1379 if (mode == GET_MODE (orig))
1382 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1384 return gen_rtx_REG (mode, REGNO (orig));
1387 /* Return 1 if *X is a thread-local symbol. */
1390 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1392 return PA_SYMBOL_REF_TLS_P (*x);
1395 /* Return 1 if X contains a thread-local symbol. */
1398 pa_tls_referenced_p (rtx x)
1400 if (!TARGET_HAVE_TLS)
1403 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1406 /* Emit insns to move operands[1] into operands[0].
1408 Return 1 if we have written out everything that needs to be done to
1409 do the move. Otherwise, return 0 and the caller will emit the move
1412 Note SCRATCH_REG may not be in the proper mode depending on how it
1413 will be used. This routine is responsible for creating a new copy
1414 of SCRATCH_REG in the proper mode. */
1417 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1419 register rtx operand0 = operands[0];
1420 register rtx operand1 = operands[1];
1423 /* We can only handle indexed addresses in the destination operand
1424 of floating point stores. Thus, we need to break out indexed
1425 addresses from the destination operand. */
1426 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1428 gcc_assert (can_create_pseudo_p ());
1430 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1431 operand0 = replace_equiv_address (operand0, tem);
1434 /* On targets with non-equivalent space registers, break out unscaled
1435 indexed addresses from the source operand before the final CSE.
1436 We have to do this because the REG_POINTER flag is not correctly
1437 carried through various optimization passes and CSE may substitute
1438 a pseudo without the pointer set for one with the pointer set. As
1439 a result, we loose various opportunities to create insns with
1440 unscaled indexed addresses. */
1441 if (!TARGET_NO_SPACE_REGS
1442 && !cse_not_expected
1443 && GET_CODE (operand1) == MEM
1444 && GET_CODE (XEXP (operand1, 0)) == PLUS
1445 && REG_P (XEXP (XEXP (operand1, 0), 0))
1446 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1448 = replace_equiv_address (operand1,
1449 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1452 && reload_in_progress && GET_CODE (operand0) == REG
1453 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1454 operand0 = reg_equiv_mem[REGNO (operand0)];
1455 else if (scratch_reg
1456 && reload_in_progress && GET_CODE (operand0) == SUBREG
1457 && GET_CODE (SUBREG_REG (operand0)) == REG
1458 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1460 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1461 the code which tracks sets/uses for delete_output_reload. */
1462 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1463 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1464 SUBREG_BYTE (operand0));
1465 operand0 = alter_subreg (&temp);
1469 && reload_in_progress && GET_CODE (operand1) == REG
1470 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1471 operand1 = reg_equiv_mem[REGNO (operand1)];
1472 else if (scratch_reg
1473 && reload_in_progress && GET_CODE (operand1) == SUBREG
1474 && GET_CODE (SUBREG_REG (operand1)) == REG
1475 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1477 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1478 the code which tracks sets/uses for delete_output_reload. */
1479 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1480 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1481 SUBREG_BYTE (operand1));
1482 operand1 = alter_subreg (&temp);
1485 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1486 && ((tem = find_replacement (&XEXP (operand0, 0)))
1487 != XEXP (operand0, 0)))
1488 operand0 = replace_equiv_address (operand0, tem);
1490 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1491 && ((tem = find_replacement (&XEXP (operand1, 0)))
1492 != XEXP (operand1, 0)))
1493 operand1 = replace_equiv_address (operand1, tem);
1495 /* Handle secondary reloads for loads/stores of FP registers from
1496 REG+D addresses where D does not fit in 5 or 14 bits, including
1497 (subreg (mem (addr))) cases. */
1499 && fp_reg_operand (operand0, mode)
1500 && ((GET_CODE (operand1) == MEM
1501 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1502 XEXP (operand1, 0)))
1503 || ((GET_CODE (operand1) == SUBREG
1504 && GET_CODE (XEXP (operand1, 0)) == MEM
1505 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1507 XEXP (XEXP (operand1, 0), 0))))))
1509 if (GET_CODE (operand1) == SUBREG)
1510 operand1 = XEXP (operand1, 0);
1512 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1513 it in WORD_MODE regardless of what mode it was originally given
1515 scratch_reg = force_mode (word_mode, scratch_reg);
1517 /* D might not fit in 14 bits either; for such cases load D into
1519 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1521 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1522 emit_move_insn (scratch_reg,
1523 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1525 XEXP (XEXP (operand1, 0), 0),
1529 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1530 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1531 replace_equiv_address (operand1, scratch_reg)));
1534 else if (scratch_reg
1535 && fp_reg_operand (operand1, mode)
1536 && ((GET_CODE (operand0) == MEM
1537 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1539 XEXP (operand0, 0)))
1540 || ((GET_CODE (operand0) == SUBREG)
1541 && GET_CODE (XEXP (operand0, 0)) == MEM
1542 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1544 XEXP (XEXP (operand0, 0), 0)))))
1546 if (GET_CODE (operand0) == SUBREG)
1547 operand0 = XEXP (operand0, 0);
1549 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1550 it in WORD_MODE regardless of what mode it was originally given
1552 scratch_reg = force_mode (word_mode, scratch_reg);
1554 /* D might not fit in 14 bits either; for such cases load D into
1556 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1558 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1559 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1562 XEXP (XEXP (operand0, 0),
1567 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1568 emit_insn (gen_rtx_SET (VOIDmode,
1569 replace_equiv_address (operand0, scratch_reg),
1573 /* Handle secondary reloads for loads of FP registers from constant
1574 expressions by forcing the constant into memory.
1576 Use scratch_reg to hold the address of the memory location.
1578 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1579 NO_REGS when presented with a const_int and a register class
1580 containing only FP registers. Doing so unfortunately creates
1581 more problems than it solves. Fix this for 2.5. */
1582 else if (scratch_reg
1583 && CONSTANT_P (operand1)
1584 && fp_reg_operand (operand0, mode))
1586 rtx const_mem, xoperands[2];
1588 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1589 it in WORD_MODE regardless of what mode it was originally given
1591 scratch_reg = force_mode (word_mode, scratch_reg);
1593 /* Force the constant into memory and put the address of the
1594 memory location into scratch_reg. */
1595 const_mem = force_const_mem (mode, operand1);
1596 xoperands[0] = scratch_reg;
1597 xoperands[1] = XEXP (const_mem, 0);
1598 emit_move_sequence (xoperands, Pmode, 0);
1600 /* Now load the destination register. */
1601 emit_insn (gen_rtx_SET (mode, operand0,
1602 replace_equiv_address (const_mem, scratch_reg)));
1605 /* Handle secondary reloads for SAR. These occur when trying to load
1606 the SAR from memory, FP register, or with a constant. */
1607 else if (scratch_reg
1608 && GET_CODE (operand0) == REG
1609 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1610 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1611 && (GET_CODE (operand1) == MEM
1612 || GET_CODE (operand1) == CONST_INT
1613 || (GET_CODE (operand1) == REG
1614 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1616 /* D might not fit in 14 bits either; for such cases load D into
1618 if (GET_CODE (operand1) == MEM
1619 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1621 /* We are reloading the address into the scratch register, so we
1622 want to make sure the scratch register is a full register. */
1623 scratch_reg = force_mode (word_mode, scratch_reg);
1625 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1626 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1629 XEXP (XEXP (operand1, 0),
1633 /* Now we are going to load the scratch register from memory,
1634 we want to load it in the same width as the original MEM,
1635 which must be the same as the width of the ultimate destination,
1637 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1639 emit_move_insn (scratch_reg,
1640 replace_equiv_address (operand1, scratch_reg));
1644 /* We want to load the scratch register using the same mode as
1645 the ultimate destination. */
1646 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1648 emit_move_insn (scratch_reg, operand1);
1651 /* And emit the insn to set the ultimate destination. We know that
1652 the scratch register has the same mode as the destination at this
1654 emit_move_insn (operand0, scratch_reg);
1657 /* Handle the most common case: storing into a register. */
1658 else if (register_operand (operand0, mode))
1660 if (register_operand (operand1, mode)
1661 || (GET_CODE (operand1) == CONST_INT
1662 && cint_ok_for_move (INTVAL (operand1)))
1663 || (operand1 == CONST0_RTX (mode))
1664 || (GET_CODE (operand1) == HIGH
1665 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1666 /* Only `general_operands' can come here, so MEM is ok. */
1667 || GET_CODE (operand1) == MEM)
1669 /* Various sets are created during RTL generation which don't
1670 have the REG_POINTER flag correctly set. After the CSE pass,
1671 instruction recognition can fail if we don't consistently
1672 set this flag when performing register copies. This should
1673 also improve the opportunities for creating insns that use
1674 unscaled indexing. */
1675 if (REG_P (operand0) && REG_P (operand1))
1677 if (REG_POINTER (operand1)
1678 && !REG_POINTER (operand0)
1679 && !HARD_REGISTER_P (operand0))
1680 copy_reg_pointer (operand0, operand1);
1681 else if (REG_POINTER (operand0)
1682 && !REG_POINTER (operand1)
1683 && !HARD_REGISTER_P (operand1))
1684 copy_reg_pointer (operand1, operand0);
1687 /* When MEMs are broken out, the REG_POINTER flag doesn't
1688 get set. In some cases, we can set the REG_POINTER flag
1689 from the declaration for the MEM. */
1690 if (REG_P (operand0)
1691 && GET_CODE (operand1) == MEM
1692 && !REG_POINTER (operand0))
1694 tree decl = MEM_EXPR (operand1);
1696 /* Set the register pointer flag and register alignment
1697 if the declaration for this memory reference is a
1698 pointer type. Fortran indirect argument references
1701 && !(flag_argument_noalias > 1
1702 && TREE_CODE (decl) == INDIRECT_REF
1703 && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1707 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1709 if (TREE_CODE (decl) == COMPONENT_REF)
1710 decl = TREE_OPERAND (decl, 1);
1712 type = TREE_TYPE (decl);
1713 type = strip_array_types (type);
1715 if (POINTER_TYPE_P (type))
1719 type = TREE_TYPE (type);
1720 /* Using TYPE_ALIGN_OK is rather conservative as
1721 only the ada frontend actually sets it. */
1722 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1724 mark_reg_pointer (operand0, align);
1729 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1733 else if (GET_CODE (operand0) == MEM)
1735 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1736 && !(reload_in_progress || reload_completed))
1738 rtx temp = gen_reg_rtx (DFmode);
1740 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1741 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1744 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1746 /* Run this case quickly. */
1747 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1750 if (! (reload_in_progress || reload_completed))
1752 operands[0] = validize_mem (operand0);
1753 operands[1] = operand1 = force_reg (mode, operand1);
1757 /* Simplify the source if we need to.
1758 Note we do have to handle function labels here, even though we do
1759 not consider them legitimate constants. Loop optimizations can
1760 call the emit_move_xxx with one as a source. */
1761 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1762 || function_label_operand (operand1, mode)
1763 || (GET_CODE (operand1) == HIGH
1764 && symbolic_operand (XEXP (operand1, 0), mode)))
1768 if (GET_CODE (operand1) == HIGH)
1771 operand1 = XEXP (operand1, 0);
1773 if (symbolic_operand (operand1, mode))
1775 /* Argh. The assembler and linker can't handle arithmetic
1778 So we force the plabel into memory, load operand0 from
1779 the memory location, then add in the constant part. */
1780 if ((GET_CODE (operand1) == CONST
1781 && GET_CODE (XEXP (operand1, 0)) == PLUS
1782 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1783 || function_label_operand (operand1, mode))
1785 rtx temp, const_part;
1787 /* Figure out what (if any) scratch register to use. */
1788 if (reload_in_progress || reload_completed)
1790 scratch_reg = scratch_reg ? scratch_reg : operand0;
1791 /* SCRATCH_REG will hold an address and maybe the actual
1792 data. We want it in WORD_MODE regardless of what mode it
1793 was originally given to us. */
1794 scratch_reg = force_mode (word_mode, scratch_reg);
1797 scratch_reg = gen_reg_rtx (Pmode);
1799 if (GET_CODE (operand1) == CONST)
1801 /* Save away the constant part of the expression. */
1802 const_part = XEXP (XEXP (operand1, 0), 1);
1803 gcc_assert (GET_CODE (const_part) == CONST_INT);
1805 /* Force the function label into memory. */
1806 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1810 /* No constant part. */
1811 const_part = NULL_RTX;
1813 /* Force the function label into memory. */
1814 temp = force_const_mem (mode, operand1);
1818 /* Get the address of the memory location. PIC-ify it if
1820 temp = XEXP (temp, 0);
1822 temp = legitimize_pic_address (temp, mode, scratch_reg);
1824 /* Put the address of the memory location into our destination
1827 emit_move_sequence (operands, mode, scratch_reg);
1829 /* Now load from the memory location into our destination
1831 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1832 emit_move_sequence (operands, mode, scratch_reg);
1834 /* And add back in the constant part. */
1835 if (const_part != NULL_RTX)
1836 expand_inc (operand0, const_part);
1845 if (reload_in_progress || reload_completed)
1847 temp = scratch_reg ? scratch_reg : operand0;
1848 /* TEMP will hold an address and maybe the actual
1849 data. We want it in WORD_MODE regardless of what mode it
1850 was originally given to us. */
1851 temp = force_mode (word_mode, temp);
1854 temp = gen_reg_rtx (Pmode);
1856 /* (const (plus (symbol) (const_int))) must be forced to
1857 memory during/after reload if the const_int will not fit
1859 if (GET_CODE (operand1) == CONST
1860 && GET_CODE (XEXP (operand1, 0)) == PLUS
1861 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1862 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1863 && (reload_completed || reload_in_progress)
1866 rtx const_mem = force_const_mem (mode, operand1);
1867 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1869 operands[1] = replace_equiv_address (const_mem, operands[1]);
1870 emit_move_sequence (operands, mode, temp);
1874 operands[1] = legitimize_pic_address (operand1, mode, temp);
1875 if (REG_P (operand0) && REG_P (operands[1]))
1876 copy_reg_pointer (operand0, operands[1]);
1877 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1880 /* On the HPPA, references to data space are supposed to use dp,
1881 register 27, but showing it in the RTL inhibits various cse
1882 and loop optimizations. */
1887 if (reload_in_progress || reload_completed)
1889 temp = scratch_reg ? scratch_reg : operand0;
1890 /* TEMP will hold an address and maybe the actual
1891 data. We want it in WORD_MODE regardless of what mode it
1892 was originally given to us. */
1893 temp = force_mode (word_mode, temp);
1896 temp = gen_reg_rtx (mode);
1898 /* Loading a SYMBOL_REF into a register makes that register
1899 safe to be used as the base in an indexed address.
1901 Don't mark hard registers though. That loses. */
1902 if (GET_CODE (operand0) == REG
1903 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1904 mark_reg_pointer (operand0, BITS_PER_UNIT);
1905 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1906 mark_reg_pointer (temp, BITS_PER_UNIT);
1909 set = gen_rtx_SET (mode, operand0, temp);
1911 set = gen_rtx_SET (VOIDmode,
1913 gen_rtx_LO_SUM (mode, temp, operand1));
1915 emit_insn (gen_rtx_SET (VOIDmode,
1917 gen_rtx_HIGH (mode, operand1)));
1923 else if (pa_tls_referenced_p (operand1))
1928 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1930 addend = XEXP (XEXP (tmp, 0), 1);
1931 tmp = XEXP (XEXP (tmp, 0), 0);
1934 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1935 tmp = legitimize_tls_address (tmp);
1938 tmp = gen_rtx_PLUS (mode, tmp, addend);
1939 tmp = force_operand (tmp, operands[0]);
1943 else if (GET_CODE (operand1) != CONST_INT
1944 || !cint_ok_for_move (INTVAL (operand1)))
1948 HOST_WIDE_INT value = 0;
1949 HOST_WIDE_INT insv = 0;
1952 if (GET_CODE (operand1) == CONST_INT)
1953 value = INTVAL (operand1);
1956 && GET_CODE (operand1) == CONST_INT
1957 && HOST_BITS_PER_WIDE_INT > 32
1958 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1962 /* Extract the low order 32 bits of the value and sign extend.
1963 If the new value is the same as the original value, we can
1964 can use the original value as-is. If the new value is
1965 different, we use it and insert the most-significant 32-bits
1966 of the original value into the final result. */
1967 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1968 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1971 #if HOST_BITS_PER_WIDE_INT > 32
1972 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
1976 operand1 = GEN_INT (nval);
1980 if (reload_in_progress || reload_completed)
1981 temp = scratch_reg ? scratch_reg : operand0;
1983 temp = gen_reg_rtx (mode);
1985 /* We don't directly split DImode constants on 32-bit targets
1986 because PLUS uses an 11-bit immediate and the insn sequence
1987 generated is not as efficient as the one using HIGH/LO_SUM. */
1988 if (GET_CODE (operand1) == CONST_INT
1989 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
1990 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
1993 /* Directly break constant into high and low parts. This
1994 provides better optimization opportunities because various
1995 passes recognize constants split with PLUS but not LO_SUM.
1996 We use a 14-bit signed low part except when the addition
1997 of 0x4000 to the high part might change the sign of the
1999 HOST_WIDE_INT low = value & 0x3fff;
2000 HOST_WIDE_INT high = value & ~ 0x3fff;
2004 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2012 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2013 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2017 emit_insn (gen_rtx_SET (VOIDmode, temp,
2018 gen_rtx_HIGH (mode, operand1)));
2019 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2022 insn = emit_move_insn (operands[0], operands[1]);
2024 /* Now insert the most significant 32 bits of the value
2025 into the register. When we don't have a second register
2026 available, it could take up to nine instructions to load
2027 a 64-bit integer constant. Prior to reload, we force
2028 constants that would take more than three instructions
2029 to load to the constant pool. During and after reload,
2030 we have to handle all possible values. */
2033 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2034 register and the value to be inserted is outside the
2035 range that can be loaded with three depdi instructions. */
2036 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2038 operand1 = GEN_INT (insv);
2040 emit_insn (gen_rtx_SET (VOIDmode, temp,
2041 gen_rtx_HIGH (mode, operand1)));
2042 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2043 emit_insn (gen_insv (operand0, GEN_INT (32),
2048 int len = 5, pos = 27;
2050 /* Insert the bits using the depdi instruction. */
2053 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2054 HOST_WIDE_INT sign = v5 < 0;
2056 /* Left extend the insertion. */
2057 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2058 while (pos > 0 && (insv & 1) == sign)
2060 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2065 emit_insn (gen_insv (operand0, GEN_INT (len),
2066 GEN_INT (pos), GEN_INT (v5)));
2068 len = pos > 0 && pos < 5 ? pos : 5;
2074 set_unique_reg_note (insn, REG_EQUAL, op1);
2079 /* Now have insn-emit do whatever it normally does. */
2083 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2084 it will need a link/runtime reloc). */
2087 reloc_needed (tree exp)
2091 switch (TREE_CODE (exp))
2096 case POINTER_PLUS_EXPR:
2099 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2100 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2104 case NON_LVALUE_EXPR:
2105 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2111 unsigned HOST_WIDE_INT ix;
2113 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2115 reloc |= reloc_needed (value);
2128 /* Does operand (which is a symbolic_operand) live in text space?
2129 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2133 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2135 if (GET_CODE (operand) == CONST)
2136 operand = XEXP (XEXP (operand, 0), 0);
2139 if (GET_CODE (operand) == SYMBOL_REF)
2140 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2144 if (GET_CODE (operand) == SYMBOL_REF)
2145 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2151 /* Return the best assembler insn template
2152 for moving operands[1] into operands[0] as a fullword. */
2154 singlemove_string (rtx *operands)
2156 HOST_WIDE_INT intval;
2158 if (GET_CODE (operands[0]) == MEM)
2159 return "stw %r1,%0";
2160 if (GET_CODE (operands[1]) == MEM)
2162 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2167 gcc_assert (GET_MODE (operands[1]) == SFmode);
2169 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2171 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2172 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2174 operands[1] = GEN_INT (i);
2175 /* Fall through to CONST_INT case. */
2177 if (GET_CODE (operands[1]) == CONST_INT)
2179 intval = INTVAL (operands[1]);
2181 if (VAL_14_BITS_P (intval))
2183 else if ((intval & 0x7ff) == 0)
2184 return "ldil L'%1,%0";
2185 else if (zdepi_cint_p (intval))
2186 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2188 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2190 return "copy %1,%0";
2194 /* Compute position (in OP[1]) and width (in OP[2])
2195 useful for copying IMM to a register using the zdepi
2196 instructions. Store the immediate value to insert in OP[0]. */
2198 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2202 /* Find the least significant set bit in IMM. */
2203 for (lsb = 0; lsb < 32; lsb++)
2210 /* Choose variants based on *sign* of the 5-bit field. */
2211 if ((imm & 0x10) == 0)
2212 len = (lsb <= 28) ? 4 : 32 - lsb;
2215 /* Find the width of the bitstring in IMM. */
2216 for (len = 5; len < 32; len++)
2218 if ((imm & (1 << len)) == 0)
2222 /* Sign extend IMM as a 5-bit value. */
2223 imm = (imm & 0xf) - 0x10;
2231 /* Compute position (in OP[1]) and width (in OP[2])
2232 useful for copying IMM to a register using the depdi,z
2233 instructions. Store the immediate value to insert in OP[0]. */
2235 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2237 HOST_WIDE_INT lsb, len;
2239 /* Find the least significant set bit in IMM. */
2240 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2247 /* Choose variants based on *sign* of the 5-bit field. */
2248 if ((imm & 0x10) == 0)
2249 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2250 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2253 /* Find the width of the bitstring in IMM. */
2254 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2256 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2260 /* Sign extend IMM as a 5-bit value. */
2261 imm = (imm & 0xf) - 0x10;
2269 /* Output assembler code to perform a doubleword move insn
2270 with operands OPERANDS. */
2273 output_move_double (rtx *operands)
2275 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2277 rtx addreg0 = 0, addreg1 = 0;
2279 /* First classify both operands. */
2281 if (REG_P (operands[0]))
2283 else if (offsettable_memref_p (operands[0]))
2285 else if (GET_CODE (operands[0]) == MEM)
2290 if (REG_P (operands[1]))
2292 else if (CONSTANT_P (operands[1]))
2294 else if (offsettable_memref_p (operands[1]))
2296 else if (GET_CODE (operands[1]) == MEM)
2301 /* Check for the cases that the operand constraints are not
2302 supposed to allow to happen. */
2303 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2305 /* Handle copies between general and floating registers. */
2307 if (optype0 == REGOP && optype1 == REGOP
2308 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2310 if (FP_REG_P (operands[0]))
2312 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2313 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2314 return "{fldds|fldd} -16(%%sp),%0";
2318 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2319 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2320 return "{ldws|ldw} -12(%%sp),%R0";
2324 /* Handle auto decrementing and incrementing loads and stores
2325 specifically, since the structure of the function doesn't work
2326 for them without major modification. Do it better when we learn
2327 this port about the general inc/dec addressing of PA.
2328 (This was written by tege. Chide him if it doesn't work.) */
2330 if (optype0 == MEMOP)
2332 /* We have to output the address syntax ourselves, since print_operand
2333 doesn't deal with the addresses we want to use. Fix this later. */
2335 rtx addr = XEXP (operands[0], 0);
2336 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2338 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2340 operands[0] = XEXP (addr, 0);
2341 gcc_assert (GET_CODE (operands[1]) == REG
2342 && GET_CODE (operands[0]) == REG);
2344 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2346 /* No overlap between high target register and address
2347 register. (We do this in a non-obvious way to
2348 save a register file writeback) */
2349 if (GET_CODE (addr) == POST_INC)
2350 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2351 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2353 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2355 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2357 operands[0] = XEXP (addr, 0);
2358 gcc_assert (GET_CODE (operands[1]) == REG
2359 && GET_CODE (operands[0]) == REG);
2361 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2362 /* No overlap between high target register and address
2363 register. (We do this in a non-obvious way to save a
2364 register file writeback) */
2365 if (GET_CODE (addr) == PRE_INC)
2366 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2367 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2370 if (optype1 == MEMOP)
2372 /* We have to output the address syntax ourselves, since print_operand
2373 doesn't deal with the addresses we want to use. Fix this later. */
2375 rtx addr = XEXP (operands[1], 0);
2376 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2378 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2380 operands[1] = XEXP (addr, 0);
2381 gcc_assert (GET_CODE (operands[0]) == REG
2382 && GET_CODE (operands[1]) == REG);
2384 if (!reg_overlap_mentioned_p (high_reg, addr))
2386 /* No overlap between high target register and address
2387 register. (We do this in a non-obvious way to
2388 save a register file writeback) */
2389 if (GET_CODE (addr) == POST_INC)
2390 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2391 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2395 /* This is an undefined situation. We should load into the
2396 address register *and* update that register. Probably
2397 we don't need to handle this at all. */
2398 if (GET_CODE (addr) == POST_INC)
2399 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2400 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2403 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2405 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2407 operands[1] = XEXP (addr, 0);
2408 gcc_assert (GET_CODE (operands[0]) == REG
2409 && GET_CODE (operands[1]) == REG);
2411 if (!reg_overlap_mentioned_p (high_reg, addr))
2413 /* No overlap between high target register and address
2414 register. (We do this in a non-obvious way to
2415 save a register file writeback) */
2416 if (GET_CODE (addr) == PRE_INC)
2417 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2418 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2422 /* This is an undefined situation. We should load into the
2423 address register *and* update that register. Probably
2424 we don't need to handle this at all. */
2425 if (GET_CODE (addr) == PRE_INC)
2426 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2427 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2430 else if (GET_CODE (addr) == PLUS
2431 && GET_CODE (XEXP (addr, 0)) == MULT)
2434 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2436 if (!reg_overlap_mentioned_p (high_reg, addr))
2438 xoperands[0] = high_reg;
2439 xoperands[1] = XEXP (addr, 1);
2440 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2441 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2442 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2444 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2448 xoperands[0] = high_reg;
2449 xoperands[1] = XEXP (addr, 1);
2450 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2451 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2452 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2454 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2459 /* If an operand is an unoffsettable memory ref, find a register
2460 we can increment temporarily to make it refer to the second word. */
2462 if (optype0 == MEMOP)
2463 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2465 if (optype1 == MEMOP)
2466 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2468 /* Ok, we can do one word at a time.
2469 Normally we do the low-numbered word first.
2471 In either case, set up in LATEHALF the operands to use
2472 for the high-numbered word and in some cases alter the
2473 operands in OPERANDS to be suitable for the low-numbered word. */
2475 if (optype0 == REGOP)
2476 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2477 else if (optype0 == OFFSOP)
2478 latehalf[0] = adjust_address (operands[0], SImode, 4);
2480 latehalf[0] = operands[0];
2482 if (optype1 == REGOP)
2483 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2484 else if (optype1 == OFFSOP)
2485 latehalf[1] = adjust_address (operands[1], SImode, 4);
2486 else if (optype1 == CNSTOP)
2487 split_double (operands[1], &operands[1], &latehalf[1]);
2489 latehalf[1] = operands[1];
2491 /* If the first move would clobber the source of the second one,
2492 do them in the other order.
2494 This can happen in two cases:
2496 mem -> register where the first half of the destination register
2497 is the same register used in the memory's address. Reload
2498 can create such insns.
2500 mem in this case will be either register indirect or register
2501 indirect plus a valid offset.
2503 register -> register move where REGNO(dst) == REGNO(src + 1)
2504 someone (Tim/Tege?) claimed this can happen for parameter loads.
2506 Handle mem -> register case first. */
2507 if (optype0 == REGOP
2508 && (optype1 == MEMOP || optype1 == OFFSOP)
2509 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2512 /* Do the late half first. */
2514 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2515 output_asm_insn (singlemove_string (latehalf), latehalf);
2519 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2520 return singlemove_string (operands);
2523 /* Now handle register -> register case. */
2524 if (optype0 == REGOP && optype1 == REGOP
2525 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2527 output_asm_insn (singlemove_string (latehalf), latehalf);
2528 return singlemove_string (operands);
2531 /* Normal case: do the two words, low-numbered first. */
2533 output_asm_insn (singlemove_string (operands), operands);
2535 /* Make any unoffsettable addresses point at high-numbered word. */
2537 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2539 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2542 output_asm_insn (singlemove_string (latehalf), latehalf);
2544 /* Undo the adds we just did. */
2546 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2548 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2554 output_fp_move_double (rtx *operands)
2556 if (FP_REG_P (operands[0]))
2558 if (FP_REG_P (operands[1])
2559 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2560 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2562 output_asm_insn ("fldd%F1 %1,%0", operands);
2564 else if (FP_REG_P (operands[1]))
2566 output_asm_insn ("fstd%F0 %1,%0", operands);
2572 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2574 /* This is a pain. You have to be prepared to deal with an
2575 arbitrary address here including pre/post increment/decrement.
2577 so avoid this in the MD. */
2578 gcc_assert (GET_CODE (operands[0]) == REG);
2580 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2581 xoperands[0] = operands[0];
2582 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2587 /* Return a REG that occurs in ADDR with coefficient 1.
2588 ADDR can be effectively incremented by incrementing REG. */
2591 find_addr_reg (rtx addr)
2593 while (GET_CODE (addr) == PLUS)
2595 if (GET_CODE (XEXP (addr, 0)) == REG)
2596 addr = XEXP (addr, 0);
2597 else if (GET_CODE (XEXP (addr, 1)) == REG)
2598 addr = XEXP (addr, 1);
2599 else if (CONSTANT_P (XEXP (addr, 0)))
2600 addr = XEXP (addr, 1);
2601 else if (CONSTANT_P (XEXP (addr, 1)))
2602 addr = XEXP (addr, 0);
2606 gcc_assert (GET_CODE (addr) == REG);
2610 /* Emit code to perform a block move.
2612 OPERANDS[0] is the destination pointer as a REG, clobbered.
2613 OPERANDS[1] is the source pointer as a REG, clobbered.
2614 OPERANDS[2] is a register for temporary storage.
2615 OPERANDS[3] is a register for temporary storage.
2616 OPERANDS[4] is the size as a CONST_INT
2617 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2618 OPERANDS[6] is another temporary register. */
2621 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2623 int align = INTVAL (operands[5]);
2624 unsigned long n_bytes = INTVAL (operands[4]);
2626 /* We can't move more than a word at a time because the PA
2627 has no longer integer move insns. (Could use fp mem ops?) */
2628 if (align > (TARGET_64BIT ? 8 : 4))
2629 align = (TARGET_64BIT ? 8 : 4);
2631 /* Note that we know each loop below will execute at least twice
2632 (else we would have open-coded the copy). */
2636 /* Pre-adjust the loop counter. */
2637 operands[4] = GEN_INT (n_bytes - 16);
2638 output_asm_insn ("ldi %4,%2", operands);
2641 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2642 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2643 output_asm_insn ("std,ma %3,8(%0)", operands);
2644 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2645 output_asm_insn ("std,ma %6,8(%0)", operands);
2647 /* Handle the residual. There could be up to 7 bytes of
2648 residual to copy! */
2649 if (n_bytes % 16 != 0)
2651 operands[4] = GEN_INT (n_bytes % 8);
2652 if (n_bytes % 16 >= 8)
2653 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2654 if (n_bytes % 8 != 0)
2655 output_asm_insn ("ldd 0(%1),%6", operands);
2656 if (n_bytes % 16 >= 8)
2657 output_asm_insn ("std,ma %3,8(%0)", operands);
2658 if (n_bytes % 8 != 0)
2659 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2664 /* Pre-adjust the loop counter. */
2665 operands[4] = GEN_INT (n_bytes - 8);
2666 output_asm_insn ("ldi %4,%2", operands);
2669 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2670 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2671 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2672 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2673 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2675 /* Handle the residual. There could be up to 7 bytes of
2676 residual to copy! */
2677 if (n_bytes % 8 != 0)
2679 operands[4] = GEN_INT (n_bytes % 4);
2680 if (n_bytes % 8 >= 4)
2681 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2682 if (n_bytes % 4 != 0)
2683 output_asm_insn ("ldw 0(%1),%6", operands);
2684 if (n_bytes % 8 >= 4)
2685 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2686 if (n_bytes % 4 != 0)
2687 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2692 /* Pre-adjust the loop counter. */
2693 operands[4] = GEN_INT (n_bytes - 4);
2694 output_asm_insn ("ldi %4,%2", operands);
2697 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2698 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2699 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2700 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2701 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2703 /* Handle the residual. */
2704 if (n_bytes % 4 != 0)
2706 if (n_bytes % 4 >= 2)
2707 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2708 if (n_bytes % 2 != 0)
2709 output_asm_insn ("ldb 0(%1),%6", operands);
2710 if (n_bytes % 4 >= 2)
2711 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2712 if (n_bytes % 2 != 0)
2713 output_asm_insn ("stb %6,0(%0)", operands);
2718 /* Pre-adjust the loop counter. */
2719 operands[4] = GEN_INT (n_bytes - 2);
2720 output_asm_insn ("ldi %4,%2", operands);
2723 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2724 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2725 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2726 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2727 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2729 /* Handle the residual. */
2730 if (n_bytes % 2 != 0)
2732 output_asm_insn ("ldb 0(%1),%3", operands);
2733 output_asm_insn ("stb %3,0(%0)", operands);
2742 /* Count the number of insns necessary to handle this block move.
2744 Basic structure is the same as emit_block_move, except that we
2745 count insns rather than emit them. */
2748 compute_movmem_length (rtx insn)
2750 rtx pat = PATTERN (insn);
2751 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2752 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2753 unsigned int n_insns = 0;
2755 /* We can't move more than four bytes at a time because the PA
2756 has no longer integer move insns. (Could use fp mem ops?) */
2757 if (align > (TARGET_64BIT ? 8 : 4))
2758 align = (TARGET_64BIT ? 8 : 4);
2760 /* The basic copying loop. */
2764 if (n_bytes % (2 * align) != 0)
2766 if ((n_bytes % (2 * align)) >= align)
2769 if ((n_bytes % align) != 0)
2773 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2777 /* Emit code to perform a block clear.
2779 OPERANDS[0] is the destination pointer as a REG, clobbered.
2780 OPERANDS[1] is a register for temporary storage.
2781 OPERANDS[2] is the size as a CONST_INT
2782 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2785 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2787 int align = INTVAL (operands[3]);
2788 unsigned long n_bytes = INTVAL (operands[2]);
2790 /* We can't clear more than a word at a time because the PA
2791 has no longer integer move insns. */
2792 if (align > (TARGET_64BIT ? 8 : 4))
2793 align = (TARGET_64BIT ? 8 : 4);
2795 /* Note that we know each loop below will execute at least twice
2796 (else we would have open-coded the copy). */
2800 /* Pre-adjust the loop counter. */
2801 operands[2] = GEN_INT (n_bytes - 16);
2802 output_asm_insn ("ldi %2,%1", operands);
2805 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2806 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2807 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2809 /* Handle the residual. There could be up to 7 bytes of
2810 residual to copy! */
2811 if (n_bytes % 16 != 0)
2813 operands[2] = GEN_INT (n_bytes % 8);
2814 if (n_bytes % 16 >= 8)
2815 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2816 if (n_bytes % 8 != 0)
2817 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2822 /* Pre-adjust the loop counter. */
2823 operands[2] = GEN_INT (n_bytes - 8);
2824 output_asm_insn ("ldi %2,%1", operands);
2827 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2828 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2829 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2831 /* Handle the residual. There could be up to 7 bytes of
2832 residual to copy! */
2833 if (n_bytes % 8 != 0)
2835 operands[2] = GEN_INT (n_bytes % 4);
2836 if (n_bytes % 8 >= 4)
2837 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2838 if (n_bytes % 4 != 0)
2839 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2844 /* Pre-adjust the loop counter. */
2845 operands[2] = GEN_INT (n_bytes - 4);
2846 output_asm_insn ("ldi %2,%1", operands);
2849 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2850 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2851 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2853 /* Handle the residual. */
2854 if (n_bytes % 4 != 0)
2856 if (n_bytes % 4 >= 2)
2857 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2858 if (n_bytes % 2 != 0)
2859 output_asm_insn ("stb %%r0,0(%0)", operands);
2864 /* Pre-adjust the loop counter. */
2865 operands[2] = GEN_INT (n_bytes - 2);
2866 output_asm_insn ("ldi %2,%1", operands);
2869 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2870 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2871 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2873 /* Handle the residual. */
2874 if (n_bytes % 2 != 0)
2875 output_asm_insn ("stb %%r0,0(%0)", operands);
2884 /* Count the number of insns necessary to handle this block move.
2886 Basic structure is the same as emit_block_move, except that we
2887 count insns rather than emit them. */
2890 compute_clrmem_length (rtx insn)
2892 rtx pat = PATTERN (insn);
2893 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2894 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2895 unsigned int n_insns = 0;
2897 /* We can't clear more than a word at a time because the PA
2898 has no longer integer move insns. */
2899 if (align > (TARGET_64BIT ? 8 : 4))
2900 align = (TARGET_64BIT ? 8 : 4);
2902 /* The basic loop. */
2906 if (n_bytes % (2 * align) != 0)
2908 if ((n_bytes % (2 * align)) >= align)
2911 if ((n_bytes % align) != 0)
2915 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2921 output_and (rtx *operands)
2923 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2925 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2926 int ls0, ls1, ms0, p, len;
2928 for (ls0 = 0; ls0 < 32; ls0++)
2929 if ((mask & (1 << ls0)) == 0)
2932 for (ls1 = ls0; ls1 < 32; ls1++)
2933 if ((mask & (1 << ls1)) != 0)
2936 for (ms0 = ls1; ms0 < 32; ms0++)
2937 if ((mask & (1 << ms0)) == 0)
2940 gcc_assert (ms0 == 32);
2948 operands[2] = GEN_INT (len);
2949 return "{extru|extrw,u} %1,31,%2,%0";
2953 /* We could use this `depi' for the case above as well, but `depi'
2954 requires one more register file access than an `extru'. */
2959 operands[2] = GEN_INT (p);
2960 operands[3] = GEN_INT (len);
2961 return "{depi|depwi} 0,%2,%3,%0";
2965 return "and %1,%2,%0";
2968 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2969 storing the result in operands[0]. */
2971 output_64bit_and (rtx *operands)
2973 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2975 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2976 int ls0, ls1, ms0, p, len;
2978 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2979 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2982 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2983 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2986 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2987 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2990 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
2992 if (ls1 == HOST_BITS_PER_WIDE_INT)
2998 operands[2] = GEN_INT (len);
2999 return "extrd,u %1,63,%2,%0";
3003 /* We could use this `depi' for the case above as well, but `depi'
3004 requires one more register file access than an `extru'. */
3009 operands[2] = GEN_INT (p);
3010 operands[3] = GEN_INT (len);
3011 return "depdi 0,%2,%3,%0";
3015 return "and %1,%2,%0";
3019 output_ior (rtx *operands)
3021 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3022 int bs0, bs1, p, len;
3024 if (INTVAL (operands[2]) == 0)
3025 return "copy %1,%0";
3027 for (bs0 = 0; bs0 < 32; bs0++)
3028 if ((mask & (1 << bs0)) != 0)
3031 for (bs1 = bs0; bs1 < 32; bs1++)
3032 if ((mask & (1 << bs1)) == 0)
3035 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3040 operands[2] = GEN_INT (p);
3041 operands[3] = GEN_INT (len);
3042 return "{depi|depwi} -1,%2,%3,%0";
3045 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3046 storing the result in operands[0]. */
3048 output_64bit_ior (rtx *operands)
3050 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3051 int bs0, bs1, p, len;
3053 if (INTVAL (operands[2]) == 0)
3054 return "copy %1,%0";
3056 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3057 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3060 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3061 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3064 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3065 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3070 operands[2] = GEN_INT (p);
3071 operands[3] = GEN_INT (len);
3072 return "depdi -1,%2,%3,%0";
3075 /* Target hook for assembling integer objects. This code handles
3076 aligned SI and DI integers specially since function references
3077 must be preceded by P%. */
3080 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3082 if (size == UNITS_PER_WORD
3084 && function_label_operand (x, VOIDmode))
3086 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3087 output_addr_const (asm_out_file, x);
3088 fputc ('\n', asm_out_file);
3091 return default_assemble_integer (x, size, aligned_p);
3094 /* Output an ascii string. */
3096 output_ascii (FILE *file, const char *p, int size)
3100 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3102 /* The HP assembler can only take strings of 256 characters at one
3103 time. This is a limitation on input line length, *not* the
3104 length of the string. Sigh. Even worse, it seems that the
3105 restriction is in number of input characters (see \xnn &
3106 \whatever). So we have to do this very carefully. */
3108 fputs ("\t.STRING \"", file);
3111 for (i = 0; i < size; i += 4)
3115 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3117 register unsigned int c = (unsigned char) p[i + io];
3119 if (c == '\"' || c == '\\')
3120 partial_output[co++] = '\\';
3121 if (c >= ' ' && c < 0177)
3122 partial_output[co++] = c;
3126 partial_output[co++] = '\\';
3127 partial_output[co++] = 'x';
3128 hexd = c / 16 - 0 + '0';
3130 hexd -= '9' - 'a' + 1;
3131 partial_output[co++] = hexd;
3132 hexd = c % 16 - 0 + '0';
3134 hexd -= '9' - 'a' + 1;
3135 partial_output[co++] = hexd;
3138 if (chars_output + co > 243)
3140 fputs ("\"\n\t.STRING \"", file);
3143 fwrite (partial_output, 1, (size_t) co, file);
3147 fputs ("\"\n", file);
3150 /* Try to rewrite floating point comparisons & branches to avoid
3151 useless add,tr insns.
3153 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3154 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3155 first attempt to remove useless add,tr insns. It is zero
3156 for the second pass as reorg sometimes leaves bogus REG_DEAD
3159 When CHECK_NOTES is zero we can only eliminate add,tr insns
3160 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3163 remove_useless_addtr_insns (int check_notes)
3166 static int pass = 0;
3168 /* This is fairly cheap, so always run it when optimizing. */
3172 int fbranch_count = 0;
3174 /* Walk all the insns in this function looking for fcmp & fbranch
3175 instructions. Keep track of how many of each we find. */
3176 for (insn = get_insns (); insn; insn = next_insn (insn))
3180 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3181 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3184 tmp = PATTERN (insn);
3186 /* It must be a set. */
3187 if (GET_CODE (tmp) != SET)
3190 /* If the destination is CCFP, then we've found an fcmp insn. */
3191 tmp = SET_DEST (tmp);
3192 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3198 tmp = PATTERN (insn);
3199 /* If this is an fbranch instruction, bump the fbranch counter. */
3200 if (GET_CODE (tmp) == SET
3201 && SET_DEST (tmp) == pc_rtx
3202 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3203 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3204 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3205 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3213 /* Find all floating point compare + branch insns. If possible,
3214 reverse the comparison & the branch to avoid add,tr insns. */
3215 for (insn = get_insns (); insn; insn = next_insn (insn))
3219 /* Ignore anything that isn't an INSN. */
3220 if (GET_CODE (insn) != INSN)
3223 tmp = PATTERN (insn);
3225 /* It must be a set. */
3226 if (GET_CODE (tmp) != SET)
3229 /* The destination must be CCFP, which is register zero. */
3230 tmp = SET_DEST (tmp);
3231 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3234 /* INSN should be a set of CCFP.
3236 See if the result of this insn is used in a reversed FP
3237 conditional branch. If so, reverse our condition and
3238 the branch. Doing so avoids useless add,tr insns. */
3239 next = next_insn (insn);
3242 /* Jumps, calls and labels stop our search. */
3243 if (GET_CODE (next) == JUMP_INSN
3244 || GET_CODE (next) == CALL_INSN
3245 || GET_CODE (next) == CODE_LABEL)
3248 /* As does another fcmp insn. */
3249 if (GET_CODE (next) == INSN
3250 && GET_CODE (PATTERN (next)) == SET
3251 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3252 && REGNO (SET_DEST (PATTERN (next))) == 0)
3255 next = next_insn (next);
3258 /* Is NEXT_INSN a branch? */
3260 && GET_CODE (next) == JUMP_INSN)
3262 rtx pattern = PATTERN (next);
3264 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3265 and CCFP dies, then reverse our conditional and the branch
3266 to avoid the add,tr. */
3267 if (GET_CODE (pattern) == SET
3268 && SET_DEST (pattern) == pc_rtx
3269 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3270 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3271 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3272 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3273 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3274 && (fcmp_count == fbranch_count
3276 && find_regno_note (next, REG_DEAD, 0))))
3278 /* Reverse the branch. */
3279 tmp = XEXP (SET_SRC (pattern), 1);
3280 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3281 XEXP (SET_SRC (pattern), 2) = tmp;
3282 INSN_CODE (next) = -1;
3284 /* Reverse our condition. */
3285 tmp = PATTERN (insn);
3286 PUT_CODE (XEXP (tmp, 1),
3287 (reverse_condition_maybe_unordered
3288 (GET_CODE (XEXP (tmp, 1)))));
3298 /* You may have trouble believing this, but this is the 32 bit HP-PA
3303 Variable arguments (optional; any number may be allocated)
3305 SP-(4*(N+9)) arg word N
3310 Fixed arguments (must be allocated; may remain unused)
3319 SP-32 External Data Pointer (DP)
3321 SP-24 External/stub RP (RP')
3325 SP-8 Calling Stub RP (RP'')
3330 SP-0 Stack Pointer (points to next available address)
3334 /* This function saves registers as follows. Registers marked with ' are
3335 this function's registers (as opposed to the previous function's).
3336 If a frame_pointer isn't needed, r4 is saved as a general register;
3337 the space for the frame pointer is still allocated, though, to keep
3343 SP (FP') Previous FP
3344 SP + 4 Alignment filler (sigh)
3345 SP + 8 Space for locals reserved here.
3349 SP + n All call saved register used.
3353 SP + o All call saved fp registers used.
3357 SP + p (SP') points to next available address.
3361 /* Global variables set by output_function_prologue(). */
3362 /* Size of frame. Need to know this to emit return insns from
3364 static HOST_WIDE_INT actual_fsize, local_fsize;
3365 static int save_fregs;
3367 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3368 Handle case where DISP > 8k by using the add_high_const patterns.
3370 Note in DISP > 8k case, we will leave the high part of the address
3371 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3374 store_reg (int reg, HOST_WIDE_INT disp, int base)
3376 rtx insn, dest, src, basereg;
3378 src = gen_rtx_REG (word_mode, reg);
3379 basereg = gen_rtx_REG (Pmode, base);
3380 if (VAL_14_BITS_P (disp))
3382 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3383 insn = emit_move_insn (dest, src);
3385 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3387 rtx delta = GEN_INT (disp);
3388 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3390 emit_move_insn (tmpreg, delta);
3391 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3394 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3395 gen_rtx_SET (VOIDmode, tmpreg,
3396 gen_rtx_PLUS (Pmode, basereg, delta)));
3397 RTX_FRAME_RELATED_P (insn) = 1;
3399 dest = gen_rtx_MEM (word_mode, tmpreg);
3400 insn = emit_move_insn (dest, src);
3404 rtx delta = GEN_INT (disp);
3405 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3406 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3408 emit_move_insn (tmpreg, high);
3409 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3410 insn = emit_move_insn (dest, src);
3412 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3413 gen_rtx_SET (VOIDmode,
3414 gen_rtx_MEM (word_mode,
3415 gen_rtx_PLUS (word_mode,
3422 RTX_FRAME_RELATED_P (insn) = 1;
3425 /* Emit RTL to store REG at the memory location specified by BASE and then
3426 add MOD to BASE. MOD must be <= 8k. */
3429 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3431 rtx insn, basereg, srcreg, delta;
3433 gcc_assert (VAL_14_BITS_P (mod));
3435 basereg = gen_rtx_REG (Pmode, base);
3436 srcreg = gen_rtx_REG (word_mode, reg);
3437 delta = GEN_INT (mod);
3439 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3442 RTX_FRAME_RELATED_P (insn) = 1;
3444 /* RTX_FRAME_RELATED_P must be set on each frame related set
3445 in a parallel with more than one element. */
3446 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3447 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3451 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3452 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3453 whether to add a frame note or not.
3455 In the DISP > 8k case, we leave the high part of the address in %r1.
3456 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3459 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3463 if (VAL_14_BITS_P (disp))
3465 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3466 plus_constant (gen_rtx_REG (Pmode, base), disp));
3468 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3470 rtx basereg = gen_rtx_REG (Pmode, base);
3471 rtx delta = GEN_INT (disp);
3472 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3474 emit_move_insn (tmpreg, delta);
3475 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3476 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3478 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3479 gen_rtx_SET (VOIDmode, tmpreg,
3480 gen_rtx_PLUS (Pmode, basereg, delta)));
3484 rtx basereg = gen_rtx_REG (Pmode, base);
3485 rtx delta = GEN_INT (disp);
3486 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3488 emit_move_insn (tmpreg,
3489 gen_rtx_PLUS (Pmode, basereg,
3490 gen_rtx_HIGH (Pmode, delta)));
3491 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3492 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3495 if (DO_FRAME_NOTES && note)
3496 RTX_FRAME_RELATED_P (insn) = 1;
3500 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3505 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3506 be consistent with the rounding and size calculation done here.
3507 Change them at the same time. */
3509 /* We do our own stack alignment. First, round the size of the
3510 stack locals up to a word boundary. */
3511 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3513 /* Space for previous frame pointer + filler. If any frame is
3514 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3515 waste some space here for the sake of HP compatibility. The
3516 first slot is only used when the frame pointer is needed. */
3517 if (size || frame_pointer_needed)
3518 size += STARTING_FRAME_OFFSET;
3520 /* If the current function calls __builtin_eh_return, then we need
3521 to allocate stack space for registers that will hold data for
3522 the exception handler. */
3523 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3527 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3529 size += i * UNITS_PER_WORD;
3532 /* Account for space used by the callee general register saves. */
3533 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3534 if (df_regs_ever_live_p (i))
3535 size += UNITS_PER_WORD;
3537 /* Account for space used by the callee floating point register saves. */
3538 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3539 if (df_regs_ever_live_p (i)
3540 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3544 /* We always save both halves of the FP register, so always
3545 increment the frame size by 8 bytes. */
3549 /* If any of the floating registers are saved, account for the
3550 alignment needed for the floating point register save block. */
3553 size = (size + 7) & ~7;
3558 /* The various ABIs include space for the outgoing parameters in the
3559 size of the current function's stack frame. We don't need to align
3560 for the outgoing arguments as their alignment is set by the final
3561 rounding for the frame as a whole. */
3562 size += crtl->outgoing_args_size;
3564 /* Allocate space for the fixed frame marker. This space must be
3565 allocated for any function that makes calls or allocates
3567 if (!current_function_is_leaf || size)
3568 size += TARGET_64BIT ? 48 : 32;
3570 /* Finally, round to the preferred stack boundary. */
3571 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3572 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3575 /* Generate the assembly code for function entry. FILE is a stdio
3576 stream to output the code to. SIZE is an int: how many units of
3577 temporary storage to allocate.
3579 Refer to the array `regs_ever_live' to determine which registers to
3580 save; `regs_ever_live[I]' is nonzero if register number I is ever
3581 used in the function. This function is responsible for knowing
3582 which registers should not be saved even if used. */
3584 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3585 of memory. If any fpu reg is used in the function, we allocate
3586 such a block here, at the bottom of the frame, just in case it's needed.
3588 If this function is a leaf procedure, then we may choose not
3589 to do a "save" insn. The decision about whether or not
3590 to do this is made in regclass.c. */
3593 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3595 /* The function's label and associated .PROC must never be
3596 separated and must be output *after* any profiling declarations
3597 to avoid changing spaces/subspaces within a procedure. */
3598 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3599 fputs ("\t.PROC\n", file);
3601 /* hppa_expand_prologue does the dirty work now. We just need
3602 to output the assembler directives which denote the start
3604 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3605 if (current_function_is_leaf)
3606 fputs (",NO_CALLS", file);
3608 fputs (",CALLS", file);
3610 fputs (",SAVE_RP", file);
3612 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3613 at the beginning of the frame and that it is used as the frame
3614 pointer for the frame. We do this because our current frame
3615 layout doesn't conform to that specified in the HP runtime
3616 documentation and we need a way to indicate to programs such as
3617 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3618 isn't used by HP compilers but is supported by the assembler.
3619 However, SAVE_SP is supposed to indicate that the previous stack
3620 pointer has been saved in the frame marker. */
3621 if (frame_pointer_needed)
3622 fputs (",SAVE_SP", file);
3624 /* Pass on information about the number of callee register saves
3625 performed in the prologue.
3627 The compiler is supposed to pass the highest register number
3628 saved, the assembler then has to adjust that number before
3629 entering it into the unwind descriptor (to account for any
3630 caller saved registers with lower register numbers than the
3631 first callee saved register). */
3633 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3636 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3638 fputs ("\n\t.ENTRY\n", file);
3640 remove_useless_addtr_insns (0);
3644 hppa_expand_prologue (void)
3646 int merge_sp_adjust_with_store = 0;
3647 HOST_WIDE_INT size = get_frame_size ();
3648 HOST_WIDE_INT offset;
3656 /* Compute total size for frame pointer, filler, locals and rounding to
3657 the next word boundary. Similar code appears in compute_frame_size
3658 and must be changed in tandem with this code. */
3659 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3660 if (local_fsize || frame_pointer_needed)
3661 local_fsize += STARTING_FRAME_OFFSET;
3663 actual_fsize = compute_frame_size (size, &save_fregs);
3665 /* Compute a few things we will use often. */
3666 tmpreg = gen_rtx_REG (word_mode, 1);
3668 /* Save RP first. The calling conventions manual states RP will
3669 always be stored into the caller's frame at sp - 20 or sp - 16
3670 depending on which ABI is in use. */
3671 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3673 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3679 /* Allocate the local frame and set up the frame pointer if needed. */
3680 if (actual_fsize != 0)
3682 if (frame_pointer_needed)
3684 /* Copy the old frame pointer temporarily into %r1. Set up the
3685 new stack pointer, then store away the saved old frame pointer
3686 into the stack at sp and at the same time update the stack
3687 pointer by actual_fsize bytes. Two versions, first
3688 handles small (<8k) frames. The second handles large (>=8k)
3690 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3692 RTX_FRAME_RELATED_P (insn) = 1;
3694 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3696 RTX_FRAME_RELATED_P (insn) = 1;
3698 if (VAL_14_BITS_P (actual_fsize))
3699 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3702 /* It is incorrect to store the saved frame pointer at *sp,
3703 then increment sp (writes beyond the current stack boundary).
3705 So instead use stwm to store at *sp and post-increment the
3706 stack pointer as an atomic operation. Then increment sp to
3707 finish allocating the new frame. */
3708 HOST_WIDE_INT adjust1 = 8192 - 64;
3709 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3711 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3712 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3716 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3717 we need to store the previous stack pointer (frame pointer)
3718 into the frame marker on targets that use the HP unwind
3719 library. This allows the HP unwind library to be used to
3720 unwind GCC frames. However, we are not fully compatible
3721 with the HP library because our frame layout differs from
3722 that specified in the HP runtime specification.
3724 We don't want a frame note on this instruction as the frame
3725 marker moves during dynamic stack allocation.
3727 This instruction also serves as a blockage to prevent
3728 register spills from being scheduled before the stack
3729 pointer is raised. This is necessary as we store
3730 registers using the frame pointer as a base register,
3731 and the frame pointer is set before sp is raised. */
3732 if (TARGET_HPUX_UNWIND_LIBRARY)
3734 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3735 GEN_INT (TARGET_64BIT ? -8 : -4));
3737 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3741 emit_insn (gen_blockage ());
3743 /* no frame pointer needed. */
3746 /* In some cases we can perform the first callee register save
3747 and allocating the stack frame at the same time. If so, just
3748 make a note of it and defer allocating the frame until saving
3749 the callee registers. */
3750 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3751 merge_sp_adjust_with_store = 1;
3752 /* Can not optimize. Adjust the stack frame by actual_fsize
3755 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3760 /* Normal register save.
3762 Do not save the frame pointer in the frame_pointer_needed case. It
3763 was done earlier. */
3764 if (frame_pointer_needed)
3766 offset = local_fsize;
3768 /* Saving the EH return data registers in the frame is the simplest
3769 way to get the frame unwind information emitted. We put them
3770 just before the general registers. */
3771 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3773 unsigned int i, regno;
3777 regno = EH_RETURN_DATA_REGNO (i);
3778 if (regno == INVALID_REGNUM)
3781 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3782 offset += UNITS_PER_WORD;
3786 for (i = 18; i >= 4; i--)
3787 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3789 store_reg (i, offset, FRAME_POINTER_REGNUM);
3790 offset += UNITS_PER_WORD;
3793 /* Account for %r3 which is saved in a special place. */
3796 /* No frame pointer needed. */
3799 offset = local_fsize - actual_fsize;
3801 /* Saving the EH return data registers in the frame is the simplest
3802 way to get the frame unwind information emitted. */
3803 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3805 unsigned int i, regno;
3809 regno = EH_RETURN_DATA_REGNO (i);
3810 if (regno == INVALID_REGNUM)
3813 /* If merge_sp_adjust_with_store is nonzero, then we can
3814 optimize the first save. */
3815 if (merge_sp_adjust_with_store)
3817 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3818 merge_sp_adjust_with_store = 0;
3821 store_reg (regno, offset, STACK_POINTER_REGNUM);
3822 offset += UNITS_PER_WORD;
3826 for (i = 18; i >= 3; i--)
3827 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3829 /* If merge_sp_adjust_with_store is nonzero, then we can
3830 optimize the first GR save. */
3831 if (merge_sp_adjust_with_store)
3833 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3834 merge_sp_adjust_with_store = 0;
3837 store_reg (i, offset, STACK_POINTER_REGNUM);
3838 offset += UNITS_PER_WORD;
3842 /* If we wanted to merge the SP adjustment with a GR save, but we never
3843 did any GR saves, then just emit the adjustment here. */
3844 if (merge_sp_adjust_with_store)
3845 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3849 /* The hppa calling conventions say that %r19, the pic offset
3850 register, is saved at sp - 32 (in this function's frame)
3851 when generating PIC code. FIXME: What is the correct thing
3852 to do for functions which make no calls and allocate no
3853 frame? Do we need to allocate a frame, or can we just omit
3854 the save? For now we'll just omit the save.
3856 We don't want a note on this insn as the frame marker can
3857 move if there is a dynamic stack allocation. */
3858 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3860 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3862 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3866 /* Align pointer properly (doubleword boundary). */
3867 offset = (offset + 7) & ~7;
3869 /* Floating point register store. */
3874 /* First get the frame or stack pointer to the start of the FP register
3876 if (frame_pointer_needed)
3878 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3879 base = frame_pointer_rtx;
3883 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3884 base = stack_pointer_rtx;
3887 /* Now actually save the FP registers. */
3888 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3890 if (df_regs_ever_live_p (i)
3891 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3893 rtx addr, insn, reg;
3894 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3895 reg = gen_rtx_REG (DFmode, i);
3896 insn = emit_move_insn (addr, reg);
3899 RTX_FRAME_RELATED_P (insn) = 1;
3902 rtx mem = gen_rtx_MEM (DFmode,
3903 plus_constant (base, offset));
3904 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3905 gen_rtx_SET (VOIDmode, mem, reg));
3909 rtx meml = gen_rtx_MEM (SFmode,
3910 plus_constant (base, offset));
3911 rtx memr = gen_rtx_MEM (SFmode,
3912 plus_constant (base, offset + 4));
3913 rtx regl = gen_rtx_REG (SFmode, i);
3914 rtx regr = gen_rtx_REG (SFmode, i + 1);
3915 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3916 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3919 RTX_FRAME_RELATED_P (setl) = 1;
3920 RTX_FRAME_RELATED_P (setr) = 1;
3921 vec = gen_rtvec (2, setl, setr);
3922 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3923 gen_rtx_SEQUENCE (VOIDmode, vec));
3926 offset += GET_MODE_SIZE (DFmode);
3933 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3934 Handle case where DISP > 8k by using the add_high_const patterns. */
3937 load_reg (int reg, HOST_WIDE_INT disp, int base)
3939 rtx dest = gen_rtx_REG (word_mode, reg);
3940 rtx basereg = gen_rtx_REG (Pmode, base);
3943 if (VAL_14_BITS_P (disp))
3944 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3945 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3947 rtx delta = GEN_INT (disp);
3948 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3950 emit_move_insn (tmpreg, delta);
3951 if (TARGET_DISABLE_INDEXING)
3953 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3954 src = gen_rtx_MEM (word_mode, tmpreg);
3957 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3961 rtx delta = GEN_INT (disp);
3962 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3963 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3965 emit_move_insn (tmpreg, high);
3966 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3969 emit_move_insn (dest, src);
3972 /* Update the total code bytes output to the text section. */
3975 update_total_code_bytes (unsigned int nbytes)
3977 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
3978 && !IN_NAMED_SECTION_P (cfun->decl))
3980 unsigned int old_total = total_code_bytes;
3982 total_code_bytes += nbytes;
3984 /* Be prepared to handle overflows. */
3985 if (old_total > total_code_bytes)
3986 total_code_bytes = UINT_MAX;
3990 /* This function generates the assembly code for function exit.
3991 Args are as for output_function_prologue ().
3993 The function epilogue should not depend on the current stack
3994 pointer! It should use the frame pointer only. This is mandatory
3995 because of alloca; we also take advantage of it to omit stack
3996 adjustments before returning. */
3999 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4001 rtx insn = get_last_insn ();
4005 /* hppa_expand_epilogue does the dirty work now. We just need
4006 to output the assembler directives which denote the end
4009 To make debuggers happy, emit a nop if the epilogue was completely
4010 eliminated due to a volatile call as the last insn in the
4011 current function. That way the return address (in %r2) will
4012 always point to a valid instruction in the current function. */
4014 /* Get the last real insn. */
4015 if (GET_CODE (insn) == NOTE)
4016 insn = prev_real_insn (insn);
4018 /* If it is a sequence, then look inside. */
4019 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4020 insn = XVECEXP (PATTERN (insn), 0, 0);
4022 /* If insn is a CALL_INSN, then it must be a call to a volatile
4023 function (otherwise there would be epilogue insns). */
4024 if (insn && GET_CODE (insn) == CALL_INSN)
4026 fputs ("\tnop\n", file);
4030 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4032 if (TARGET_SOM && TARGET_GAS)
4034 /* We done with this subspace except possibly for some additional
4035 debug information. Forget that we are in this subspace to ensure
4036 that the next function is output in its own subspace. */
4038 cfun->machine->in_nsubspa = 2;
4041 if (INSN_ADDRESSES_SET_P ())
4043 insn = get_last_nonnote_insn ();
4044 last_address += INSN_ADDRESSES (INSN_UID (insn));
4046 last_address += insn_default_length (insn);
4047 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4048 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4051 last_address = UINT_MAX;
4053 /* Finally, update the total number of code bytes output so far. */
4054 update_total_code_bytes (last_address);
4058 hppa_expand_epilogue (void)
4061 HOST_WIDE_INT offset;
4062 HOST_WIDE_INT ret_off = 0;
4064 int merge_sp_adjust_with_load = 0;
4066 /* We will use this often. */
4067 tmpreg = gen_rtx_REG (word_mode, 1);
4069 /* Try to restore RP early to avoid load/use interlocks when
4070 RP gets used in the return (bv) instruction. This appears to still
4071 be necessary even when we schedule the prologue and epilogue. */
4074 ret_off = TARGET_64BIT ? -16 : -20;
4075 if (frame_pointer_needed)
4077 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4082 /* No frame pointer, and stack is smaller than 8k. */
4083 if (VAL_14_BITS_P (ret_off - actual_fsize))
4085 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4091 /* General register restores. */
4092 if (frame_pointer_needed)
4094 offset = local_fsize;
4096 /* If the current function calls __builtin_eh_return, then we need
4097 to restore the saved EH data registers. */
4098 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4100 unsigned int i, regno;
4104 regno = EH_RETURN_DATA_REGNO (i);
4105 if (regno == INVALID_REGNUM)
4108 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4109 offset += UNITS_PER_WORD;
4113 for (i = 18; i >= 4; i--)
4114 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4116 load_reg (i, offset, FRAME_POINTER_REGNUM);
4117 offset += UNITS_PER_WORD;
4122 offset = local_fsize - actual_fsize;
4124 /* If the current function calls __builtin_eh_return, then we need
4125 to restore the saved EH data registers. */
4126 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4128 unsigned int i, regno;
4132 regno = EH_RETURN_DATA_REGNO (i);
4133 if (regno == INVALID_REGNUM)
4136 /* Only for the first load.
4137 merge_sp_adjust_with_load holds the register load
4138 with which we will merge the sp adjustment. */
4139 if (merge_sp_adjust_with_load == 0
4141 && VAL_14_BITS_P (-actual_fsize))
4142 merge_sp_adjust_with_load = regno;
4144 load_reg (regno, offset, STACK_POINTER_REGNUM);
4145 offset += UNITS_PER_WORD;
4149 for (i = 18; i >= 3; i--)
4151 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4153 /* Only for the first load.
4154 merge_sp_adjust_with_load holds the register load
4155 with which we will merge the sp adjustment. */
4156 if (merge_sp_adjust_with_load == 0
4158 && VAL_14_BITS_P (-actual_fsize))
4159 merge_sp_adjust_with_load = i;
4161 load_reg (i, offset, STACK_POINTER_REGNUM);
4162 offset += UNITS_PER_WORD;
4167 /* Align pointer properly (doubleword boundary). */
4168 offset = (offset + 7) & ~7;
4170 /* FP register restores. */
4173 /* Adjust the register to index off of. */
4174 if (frame_pointer_needed)
4175 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4177 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4179 /* Actually do the restores now. */
4180 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4181 if (df_regs_ever_live_p (i)
4182 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4184 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4185 rtx dest = gen_rtx_REG (DFmode, i);
4186 emit_move_insn (dest, src);
4190 /* Emit a blockage insn here to keep these insns from being moved to
4191 an earlier spot in the epilogue, or into the main instruction stream.
4193 This is necessary as we must not cut the stack back before all the
4194 restores are finished. */
4195 emit_insn (gen_blockage ());
4197 /* Reset stack pointer (and possibly frame pointer). The stack
4198 pointer is initially set to fp + 64 to avoid a race condition. */
4199 if (frame_pointer_needed)
4201 rtx delta = GEN_INT (-64);
4203 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4204 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4206 /* If we were deferring a callee register restore, do it now. */
4207 else if (merge_sp_adjust_with_load)
4209 rtx delta = GEN_INT (-actual_fsize);
4210 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4212 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4214 else if (actual_fsize != 0)
4215 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4218 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4219 frame greater than 8k), do so now. */
4221 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4223 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4225 rtx sa = EH_RETURN_STACKADJ_RTX;
4227 emit_insn (gen_blockage ());
4228 emit_insn (TARGET_64BIT
4229 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4230 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4235 hppa_pic_save_rtx (void)
4237 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4240 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4241 #define NO_DEFERRED_PROFILE_COUNTERS 0
4245 /* Vector of funcdef numbers. */
4246 static VEC(int,heap) *funcdef_nos;
4248 /* Output deferred profile counters. */
4250 output_deferred_profile_counters (void)
4255 if (VEC_empty (int, funcdef_nos))
4258 switch_to_section (data_section);
4259 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4260 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4262 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4264 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4265 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4268 VEC_free (int, heap, funcdef_nos);
4272 hppa_profile_hook (int label_no)
4274 /* We use SImode for the address of the function in both 32 and
4275 64-bit code to avoid having to provide DImode versions of the
4276 lcla2 and load_offset_label_address insn patterns. */
4277 rtx reg = gen_reg_rtx (SImode);
4278 rtx label_rtx = gen_label_rtx ();
4279 rtx begin_label_rtx, call_insn;
4280 char begin_label_name[16];
4282 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4284 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4287 emit_move_insn (arg_pointer_rtx,
4288 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4291 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4293 /* The address of the function is loaded into %r25 with an instruction-
4294 relative sequence that avoids the use of relocations. The sequence
4295 is split so that the load_offset_label_address instruction can
4296 occupy the delay slot of the call to _mcount. */
4298 emit_insn (gen_lcla2 (reg, label_rtx));
4300 emit_insn (gen_lcla1 (reg, label_rtx));
4302 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4303 reg, begin_label_rtx, label_rtx));
4305 #if !NO_DEFERRED_PROFILE_COUNTERS
4307 rtx count_label_rtx, addr, r24;
4308 char count_label_name[16];
4310 VEC_safe_push (int, heap, funcdef_nos, label_no);
4311 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4312 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4314 addr = force_reg (Pmode, count_label_rtx);
4315 r24 = gen_rtx_REG (Pmode, 24);
4316 emit_move_insn (r24, addr);
4319 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4320 gen_rtx_SYMBOL_REF (Pmode,
4322 GEN_INT (TARGET_64BIT ? 24 : 12)));
4324 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4329 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4330 gen_rtx_SYMBOL_REF (Pmode,
4332 GEN_INT (TARGET_64BIT ? 16 : 8)));
4336 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4337 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4339 /* Indicate the _mcount call cannot throw, nor will it execute a
4341 add_reg_note (call_insn, REG_EH_REGION, constm1_rtx);
4344 /* Fetch the return address for the frame COUNT steps up from
4345 the current frame, after the prologue. FRAMEADDR is the
4346 frame pointer of the COUNT frame.
4348 We want to ignore any export stub remnants here. To handle this,
4349 we examine the code at the return address, and if it is an export
4350 stub, we return a memory rtx for the stub return address stored
4353 The value returned is used in two different ways:
4355 1. To find a function's caller.
4357 2. To change the return address for a function.
4359 This function handles most instances of case 1; however, it will
4360 fail if there are two levels of stubs to execute on the return
4361 path. The only way I believe that can happen is if the return value
4362 needs a parameter relocation, which never happens for C code.
4364 This function handles most instances of case 2; however, it will
4365 fail if we did not originally have stub code on the return path
4366 but will need stub code on the new return path. This can happen if
4367 the caller & callee are both in the main program, but the new
4368 return location is in a shared library. */
4371 return_addr_rtx (int count, rtx frameaddr)
4378 /* Instruction stream at the normal return address for the export stub:
4380 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4381 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4382 0x00011820 | stub+16: mtsp r1,sr0
4383 0xe0400002 | stub+20: be,n 0(sr0,rp)
4385 0xe0400002 must be specified as -532676606 so that it won't be
4386 rejected as an invalid immediate operand on 64-bit hosts. */
4388 HOST_WIDE_INT insns[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4394 rp = get_hard_reg_initial_val (Pmode, 2);
4396 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4399 /* If there is no export stub then just use the value saved from
4400 the return pointer register. */
4402 saved_rp = gen_reg_rtx (Pmode);
4403 emit_move_insn (saved_rp, rp);
4405 /* Get pointer to the instruction stream. We have to mask out the
4406 privilege level from the two low order bits of the return address
4407 pointer here so that ins will point to the start of the first
4408 instruction that would have been executed if we returned. */
4409 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4410 label = gen_label_rtx ();
4412 /* Check the instruction stream at the normal return address for the
4413 export stub. If it is an export stub, than our return address is
4414 really in -24[frameaddr]. */
4416 for (i = 0; i < 3; i++)
4418 rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4));
4419 rtx op1 = GEN_INT (insns[i]);
4420 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4423 /* Here we know that our return address points to an export
4424 stub. We don't want to return the address of the export stub,
4425 but rather the return address of the export stub. That return
4426 address is stored at -24[frameaddr]. */
4428 emit_move_insn (saved_rp,
4430 memory_address (Pmode,
4431 plus_constant (frameaddr,
4440 emit_bcond_fp (rtx operands[])
4442 enum rtx_code code = GET_CODE (operands[0]);
4443 rtx operand0 = operands[1];
4444 rtx operand1 = operands[2];
4445 rtx label = operands[3];
4447 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4448 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4450 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4451 gen_rtx_IF_THEN_ELSE (VOIDmode,
4454 gen_rtx_REG (CCFPmode, 0),
4456 gen_rtx_LABEL_REF (VOIDmode, label),
4461 /* Adjust the cost of a scheduling dependency. Return the new cost of
4462 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4465 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4467 enum attr_type attr_type;
4469 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4470 true dependencies as they are described with bypasses now. */
4471 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4474 if (! recog_memoized (insn))
4477 attr_type = get_attr_type (insn);
4479 switch (REG_NOTE_KIND (link))
4482 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4485 if (attr_type == TYPE_FPLOAD)
4487 rtx pat = PATTERN (insn);
4488 rtx dep_pat = PATTERN (dep_insn);
4489 if (GET_CODE (pat) == PARALLEL)
4491 /* This happens for the fldXs,mb patterns. */
4492 pat = XVECEXP (pat, 0, 0);
4494 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4495 /* If this happens, we have to extend this to schedule
4496 optimally. Return 0 for now. */
4499 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4501 if (! recog_memoized (dep_insn))
4503 switch (get_attr_type (dep_insn))
4510 case TYPE_FPSQRTSGL:
4511 case TYPE_FPSQRTDBL:
4512 /* A fpload can't be issued until one cycle before a
4513 preceding arithmetic operation has finished if
4514 the target of the fpload is any of the sources
4515 (or destination) of the arithmetic operation. */
4516 return insn_default_latency (dep_insn) - 1;
4523 else if (attr_type == TYPE_FPALU)
4525 rtx pat = PATTERN (insn);
4526 rtx dep_pat = PATTERN (dep_insn);
4527 if (GET_CODE (pat) == PARALLEL)
4529 /* This happens for the fldXs,mb patterns. */
4530 pat = XVECEXP (pat, 0, 0);
4532 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4533 /* If this happens, we have to extend this to schedule
4534 optimally. Return 0 for now. */
4537 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4539 if (! recog_memoized (dep_insn))
4541 switch (get_attr_type (dep_insn))
4545 case TYPE_FPSQRTSGL:
4546 case TYPE_FPSQRTDBL:
4547 /* An ALU flop can't be issued until two cycles before a
4548 preceding divide or sqrt operation has finished if
4549 the target of the ALU flop is any of the sources
4550 (or destination) of the divide or sqrt operation. */
4551 return insn_default_latency (dep_insn) - 2;
4559 /* For other anti dependencies, the cost is 0. */
4562 case REG_DEP_OUTPUT:
4563 /* Output dependency; DEP_INSN writes a register that INSN writes some
4565 if (attr_type == TYPE_FPLOAD)
4567 rtx pat = PATTERN (insn);
4568 rtx dep_pat = PATTERN (dep_insn);
4569 if (GET_CODE (pat) == PARALLEL)
4571 /* This happens for the fldXs,mb patterns. */
4572 pat = XVECEXP (pat, 0, 0);
4574 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4575 /* If this happens, we have to extend this to schedule
4576 optimally. Return 0 for now. */
4579 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4581 if (! recog_memoized (dep_insn))
4583 switch (get_attr_type (dep_insn))
4590 case TYPE_FPSQRTSGL:
4591 case TYPE_FPSQRTDBL:
4592 /* A fpload can't be issued until one cycle before a
4593 preceding arithmetic operation has finished if
4594 the target of the fpload is the destination of the
4595 arithmetic operation.
4597 Exception: For PA7100LC, PA7200 and PA7300, the cost
4598 is 3 cycles, unless they bundle together. We also
4599 pay the penalty if the second insn is a fpload. */
4600 return insn_default_latency (dep_insn) - 1;
4607 else if (attr_type == TYPE_FPALU)
4609 rtx pat = PATTERN (insn);
4610 rtx dep_pat = PATTERN (dep_insn);
4611 if (GET_CODE (pat) == PARALLEL)
4613 /* This happens for the fldXs,mb patterns. */
4614 pat = XVECEXP (pat, 0, 0);
4616 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4617 /* If this happens, we have to extend this to schedule
4618 optimally. Return 0 for now. */
4621 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4623 if (! recog_memoized (dep_insn))
4625 switch (get_attr_type (dep_insn))
4629 case TYPE_FPSQRTSGL:
4630 case TYPE_FPSQRTDBL:
4631 /* An ALU flop can't be issued until two cycles before a
4632 preceding divide or sqrt operation has finished if
4633 the target of the ALU flop is also the target of
4634 the divide or sqrt operation. */
4635 return insn_default_latency (dep_insn) - 2;
4643 /* For other output dependencies, the cost is 0. */
4651 /* Adjust scheduling priorities. We use this to try and keep addil
4652 and the next use of %r1 close together. */
4654 pa_adjust_priority (rtx insn, int priority)
4656 rtx set = single_set (insn);
4660 src = SET_SRC (set);
4661 dest = SET_DEST (set);
4662 if (GET_CODE (src) == LO_SUM
4663 && symbolic_operand (XEXP (src, 1), VOIDmode)
4664 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4667 else if (GET_CODE (src) == MEM
4668 && GET_CODE (XEXP (src, 0)) == LO_SUM
4669 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4670 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4673 else if (GET_CODE (dest) == MEM
4674 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4675 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4676 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4682 /* The 700 can only issue a single insn at a time.
4683 The 7XXX processors can issue two insns at a time.
4684 The 8000 can issue 4 insns at a time. */
4686 pa_issue_rate (void)
4690 case PROCESSOR_700: return 1;
4691 case PROCESSOR_7100: return 2;
4692 case PROCESSOR_7100LC: return 2;
4693 case PROCESSOR_7200: return 2;
4694 case PROCESSOR_7300: return 2;
4695 case PROCESSOR_8000: return 4;
4704 /* Return any length adjustment needed by INSN which already has its length
4705 computed as LENGTH. Return zero if no adjustment is necessary.
4707 For the PA: function calls, millicode calls, and backwards short
4708 conditional branches with unfilled delay slots need an adjustment by +1
4709 (to account for the NOP which will be inserted into the instruction stream).
4711 Also compute the length of an inline block move here as it is too
4712 complicated to express as a length attribute in pa.md. */
4714 pa_adjust_insn_length (rtx insn, int length)
4716 rtx pat = PATTERN (insn);
4718 /* Jumps inside switch tables which have unfilled delay slots need
4720 if (GET_CODE (insn) == JUMP_INSN
4721 && GET_CODE (pat) == PARALLEL
4722 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4724 /* Millicode insn with an unfilled delay slot. */
4725 else if (GET_CODE (insn) == INSN
4726 && GET_CODE (pat) != SEQUENCE
4727 && GET_CODE (pat) != USE
4728 && GET_CODE (pat) != CLOBBER
4729 && get_attr_type (insn) == TYPE_MILLI)
4731 /* Block move pattern. */
4732 else if (GET_CODE (insn) == INSN
4733 && GET_CODE (pat) == PARALLEL
4734 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4735 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4736 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4737 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4738 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4739 return compute_movmem_length (insn) - 4;
4740 /* Block clear pattern. */
4741 else if (GET_CODE (insn) == INSN
4742 && GET_CODE (pat) == PARALLEL
4743 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4744 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4745 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4746 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4747 return compute_clrmem_length (insn) - 4;
4748 /* Conditional branch with an unfilled delay slot. */
4749 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4751 /* Adjust a short backwards conditional with an unfilled delay slot. */
4752 if (GET_CODE (pat) == SET
4754 && JUMP_LABEL (insn) != NULL_RTX
4755 && ! forward_branch_p (insn))
4757 else if (GET_CODE (pat) == PARALLEL
4758 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4761 /* Adjust dbra insn with short backwards conditional branch with
4762 unfilled delay slot -- only for case where counter is in a
4763 general register register. */
4764 else if (GET_CODE (pat) == PARALLEL
4765 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4766 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4767 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4769 && ! forward_branch_p (insn))
4777 /* Print operand X (an rtx) in assembler syntax to file FILE.
4778 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4779 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4782 print_operand (FILE *file, rtx x, int code)
4787 /* Output a 'nop' if there's nothing for the delay slot. */
4788 if (dbr_sequence_length () == 0)
4789 fputs ("\n\tnop", file);
4792 /* Output a nullification completer if there's nothing for the */
4793 /* delay slot or nullification is requested. */
4794 if (dbr_sequence_length () == 0 ||
4796 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4800 /* Print out the second register name of a register pair.
4801 I.e., R (6) => 7. */
4802 fputs (reg_names[REGNO (x) + 1], file);
4805 /* A register or zero. */
4807 || (x == CONST0_RTX (DFmode))
4808 || (x == CONST0_RTX (SFmode)))
4810 fputs ("%r0", file);
4816 /* A register or zero (floating point). */
4818 || (x == CONST0_RTX (DFmode))
4819 || (x == CONST0_RTX (SFmode)))
4821 fputs ("%fr0", file);
4830 xoperands[0] = XEXP (XEXP (x, 0), 0);
4831 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4832 output_global_address (file, xoperands[1], 0);
4833 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4837 case 'C': /* Plain (C)ondition */
4839 switch (GET_CODE (x))
4842 fputs ("=", file); break;
4844 fputs ("<>", file); break;
4846 fputs (">", file); break;
4848 fputs (">=", file); break;
4850 fputs (">>=", file); break;
4852 fputs (">>", file); break;
4854 fputs ("<", file); break;
4856 fputs ("<=", file); break;
4858 fputs ("<<=", file); break;
4860 fputs ("<<", file); break;
4865 case 'N': /* Condition, (N)egated */
4866 switch (GET_CODE (x))
4869 fputs ("<>", file); break;
4871 fputs ("=", file); break;
4873 fputs ("<=", file); break;
4875 fputs ("<", file); break;
4877 fputs ("<<", file); break;
4879 fputs ("<<=", file); break;
4881 fputs (">=", file); break;
4883 fputs (">", file); break;
4885 fputs (">>", file); break;
4887 fputs (">>=", file); break;
4892 /* For floating point comparisons. Note that the output
4893 predicates are the complement of the desired mode. The
4894 conditions for GT, GE, LT, LE and LTGT cause an invalid
4895 operation exception if the result is unordered and this
4896 exception is enabled in the floating-point status register. */
4898 switch (GET_CODE (x))
4901 fputs ("!=", file); break;
4903 fputs ("=", file); break;
4905 fputs ("!>", file); break;
4907 fputs ("!>=", file); break;
4909 fputs ("!<", file); break;
4911 fputs ("!<=", file); break;
4913 fputs ("!<>", file); break;
4915 fputs ("!?<=", file); break;
4917 fputs ("!?<", file); break;
4919 fputs ("!?>=", file); break;
4921 fputs ("!?>", file); break;
4923 fputs ("!?=", file); break;
4925 fputs ("!?", file); break;
4927 fputs ("?", file); break;
4932 case 'S': /* Condition, operands are (S)wapped. */
4933 switch (GET_CODE (x))
4936 fputs ("=", file); break;
4938 fputs ("<>", file); break;
4940 fputs ("<", file); break;
4942 fputs ("<=", file); break;
4944 fputs ("<<=", file); break;
4946 fputs ("<<", file); break;
4948 fputs (">", file); break;
4950 fputs (">=", file); break;
4952 fputs (">>=", file); break;
4954 fputs (">>", file); break;
4959 case 'B': /* Condition, (B)oth swapped and negate. */
4960 switch (GET_CODE (x))
4963 fputs ("<>", file); break;
4965 fputs ("=", file); break;
4967 fputs (">=", file); break;
4969 fputs (">", file); break;
4971 fputs (">>", file); break;
4973 fputs (">>=", file); break;
4975 fputs ("<=", file); break;
4977 fputs ("<", file); break;
4979 fputs ("<<", file); break;
4981 fputs ("<<=", file); break;
4987 gcc_assert (GET_CODE (x) == CONST_INT);
4988 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4991 gcc_assert (GET_CODE (x) == CONST_INT);
4992 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4995 gcc_assert (GET_CODE (x) == CONST_INT);
4996 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4999 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5000 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5003 gcc_assert (GET_CODE (x) == CONST_INT);
5004 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5007 gcc_assert (GET_CODE (x) == CONST_INT);
5008 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5011 if (GET_CODE (x) == CONST_INT)
5016 switch (GET_CODE (XEXP (x, 0)))
5020 if (ASSEMBLER_DIALECT == 0)
5021 fputs ("s,mb", file);
5023 fputs (",mb", file);
5027 if (ASSEMBLER_DIALECT == 0)
5028 fputs ("s,ma", file);
5030 fputs (",ma", file);
5033 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5034 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5036 if (ASSEMBLER_DIALECT == 0)
5039 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5040 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5042 if (ASSEMBLER_DIALECT == 0)
5043 fputs ("x,s", file);
5047 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5051 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5057 output_global_address (file, x, 0);
5060 output_global_address (file, x, 1);
5062 case 0: /* Don't do anything special */
5067 compute_zdepwi_operands (INTVAL (x), op);
5068 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5074 compute_zdepdi_operands (INTVAL (x), op);
5075 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5079 /* We can get here from a .vtable_inherit due to our
5080 CONSTANT_ADDRESS_P rejecting perfectly good constant
5086 if (GET_CODE (x) == REG)
5088 fputs (reg_names [REGNO (x)], file);
5089 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5095 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5096 && (REGNO (x) & 1) == 0)
5099 else if (GET_CODE (x) == MEM)
5101 int size = GET_MODE_SIZE (GET_MODE (x));
5102 rtx base = NULL_RTX;
5103 switch (GET_CODE (XEXP (x, 0)))
5107 base = XEXP (XEXP (x, 0), 0);
5108 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5112 base = XEXP (XEXP (x, 0), 0);
5113 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5116 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5117 fprintf (file, "%s(%s)",
5118 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5119 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5120 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5121 fprintf (file, "%s(%s)",
5122 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5123 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5124 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5125 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5127 /* Because the REG_POINTER flag can get lost during reload,
5128 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5129 index and base registers in the combined move patterns. */
5130 rtx base = XEXP (XEXP (x, 0), 1);
5131 rtx index = XEXP (XEXP (x, 0), 0);
5133 fprintf (file, "%s(%s)",
5134 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5137 output_address (XEXP (x, 0));
5140 output_address (XEXP (x, 0));
5145 output_addr_const (file, x);
5148 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5151 output_global_address (FILE *file, rtx x, int round_constant)
5154 /* Imagine (high (const (plus ...))). */
5155 if (GET_CODE (x) == HIGH)
5158 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5159 output_addr_const (file, x);
5160 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5162 output_addr_const (file, x);
5163 fputs ("-$global$", file);
5165 else if (GET_CODE (x) == CONST)
5167 const char *sep = "";
5168 int offset = 0; /* assembler wants -$global$ at end */
5169 rtx base = NULL_RTX;
5171 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5174 base = XEXP (XEXP (x, 0), 0);
5175 output_addr_const (file, base);
5178 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5184 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5187 base = XEXP (XEXP (x, 0), 1);
5188 output_addr_const (file, base);
5191 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5197 /* How bogus. The compiler is apparently responsible for
5198 rounding the constant if it uses an LR field selector.
5200 The linker and/or assembler seem a better place since
5201 they have to do this kind of thing already.
5203 If we fail to do this, HP's optimizing linker may eliminate
5204 an addil, but not update the ldw/stw/ldo instruction that
5205 uses the result of the addil. */
5207 offset = ((offset + 0x1000) & ~0x1fff);
5209 switch (GET_CODE (XEXP (x, 0)))
5222 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5230 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5231 fputs ("-$global$", file);
5233 fprintf (file, "%s%d", sep, offset);
5236 output_addr_const (file, x);
5239 /* Output boilerplate text to appear at the beginning of the file.
5240 There are several possible versions. */
5241 #define aputs(x) fputs(x, asm_out_file)
5243 pa_file_start_level (void)
5246 aputs ("\t.LEVEL 2.0w\n");
5247 else if (TARGET_PA_20)
5248 aputs ("\t.LEVEL 2.0\n");
5249 else if (TARGET_PA_11)
5250 aputs ("\t.LEVEL 1.1\n");
5252 aputs ("\t.LEVEL 1.0\n");
5256 pa_file_start_space (int sortspace)
5258 aputs ("\t.SPACE $PRIVATE$");
5261 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5262 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5263 "\n\t.SPACE $TEXT$");
5266 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5267 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5271 pa_file_start_file (int want_version)
5273 if (write_symbols != NO_DEBUG)
5275 output_file_directive (asm_out_file, main_input_filename);
5277 aputs ("\t.version\t\"01.01\"\n");
5282 pa_file_start_mcount (const char *aswhat)
5285 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5289 pa_elf_file_start (void)
5291 pa_file_start_level ();
5292 pa_file_start_mcount ("ENTRY");
5293 pa_file_start_file (0);
5297 pa_som_file_start (void)
5299 pa_file_start_level ();
5300 pa_file_start_space (0);
5301 aputs ("\t.IMPORT $global$,DATA\n"
5302 "\t.IMPORT $$dyncall,MILLICODE\n");
5303 pa_file_start_mcount ("CODE");
5304 pa_file_start_file (0);
5308 pa_linux_file_start (void)
5310 pa_file_start_file (1);
5311 pa_file_start_level ();
5312 pa_file_start_mcount ("CODE");
5316 pa_hpux64_gas_file_start (void)
5318 pa_file_start_level ();
5319 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5321 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5323 pa_file_start_file (1);
5327 pa_hpux64_hpas_file_start (void)
5329 pa_file_start_level ();
5330 pa_file_start_space (1);
5331 pa_file_start_mcount ("CODE");
5332 pa_file_start_file (0);
5336 /* Search the deferred plabel list for SYMBOL and return its internal
5337 label. If an entry for SYMBOL is not found, a new entry is created. */
5340 get_deferred_plabel (rtx symbol)
5342 const char *fname = XSTR (symbol, 0);
5345 /* See if we have already put this function on the list of deferred
5346 plabels. This list is generally small, so a liner search is not
5347 too ugly. If it proves too slow replace it with something faster. */
5348 for (i = 0; i < n_deferred_plabels; i++)
5349 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5352 /* If the deferred plabel list is empty, or this entry was not found
5353 on the list, create a new entry on the list. */
5354 if (deferred_plabels == NULL || i == n_deferred_plabels)
5358 if (deferred_plabels == 0)
5359 deferred_plabels = (struct deferred_plabel *)
5360 ggc_alloc (sizeof (struct deferred_plabel));
5362 deferred_plabels = (struct deferred_plabel *)
5363 ggc_realloc (deferred_plabels,
5364 ((n_deferred_plabels + 1)
5365 * sizeof (struct deferred_plabel)));
5367 i = n_deferred_plabels++;
5368 deferred_plabels[i].internal_label = gen_label_rtx ();
5369 deferred_plabels[i].symbol = symbol;
5371 /* Gross. We have just implicitly taken the address of this
5372 function. Mark it in the same manner as assemble_name. */
5373 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5375 mark_referenced (id);
5378 return deferred_plabels[i].internal_label;
5382 output_deferred_plabels (void)
5386 /* If we have some deferred plabels, then we need to switch into the
5387 data or readonly data section, and align it to a 4 byte boundary
5388 before outputting the deferred plabels. */
5389 if (n_deferred_plabels)
5391 switch_to_section (flag_pic ? data_section : readonly_data_section);
5392 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5395 /* Now output the deferred plabels. */
5396 for (i = 0; i < n_deferred_plabels; i++)
5398 targetm.asm_out.internal_label (asm_out_file, "L",
5399 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5400 assemble_integer (deferred_plabels[i].symbol,
5401 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5405 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5406 /* Initialize optabs to point to HPUX long double emulation routines. */
5408 pa_hpux_init_libfuncs (void)
5410 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5411 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5412 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5413 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5414 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5415 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5416 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5417 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5418 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5420 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5421 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5422 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5423 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5424 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5425 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5426 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5428 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5429 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5430 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5431 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5433 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5434 ? "__U_Qfcnvfxt_quad_to_sgl"
5435 : "_U_Qfcnvfxt_quad_to_sgl");
5436 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5437 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5438 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5440 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5441 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5442 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5443 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5447 /* HP's millicode routines mean something special to the assembler.
5448 Keep track of which ones we have used. */
5450 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5451 static void import_milli (enum millicodes);
5452 static char imported[(int) end1000];
5453 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5454 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5455 #define MILLI_START 10
5458 import_milli (enum millicodes code)
5460 char str[sizeof (import_string)];
5462 if (!imported[(int) code])
5464 imported[(int) code] = 1;
5465 strcpy (str, import_string);
5466 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5467 output_asm_insn (str, 0);
5471 /* The register constraints have put the operands and return value in
5472 the proper registers. */
5475 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5477 import_milli (mulI);
5478 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5481 /* Emit the rtl for doing a division by a constant. */
5483 /* Do magic division millicodes exist for this value? */
5484 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5486 /* We'll use an array to keep track of the magic millicodes and
5487 whether or not we've used them already. [n][0] is signed, [n][1] is
5490 static int div_milli[16][2];
5493 emit_hpdiv_const (rtx *operands, int unsignedp)
5495 if (GET_CODE (operands[2]) == CONST_INT
5496 && INTVAL (operands[2]) > 0
5497 && INTVAL (operands[2]) < 16
5498 && magic_milli[INTVAL (operands[2])])
5500 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5502 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5506 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5507 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5509 gen_rtx_REG (SImode, 26),
5511 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5512 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5513 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5514 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5515 gen_rtx_CLOBBER (VOIDmode, ret))));
5516 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5523 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5527 /* If the divisor is a constant, try to use one of the special
5529 if (GET_CODE (operands[0]) == CONST_INT)
5531 static char buf[100];
5532 divisor = INTVAL (operands[0]);
5533 if (!div_milli[divisor][unsignedp])
5535 div_milli[divisor][unsignedp] = 1;
5537 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5539 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5543 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5544 INTVAL (operands[0]));
5545 return output_millicode_call (insn,
5546 gen_rtx_SYMBOL_REF (SImode, buf));
5550 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5551 INTVAL (operands[0]));
5552 return output_millicode_call (insn,
5553 gen_rtx_SYMBOL_REF (SImode, buf));
5556 /* Divisor isn't a special constant. */
5561 import_milli (divU);
5562 return output_millicode_call (insn,
5563 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5567 import_milli (divI);
5568 return output_millicode_call (insn,
5569 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5574 /* Output a $$rem millicode to do mod. */
5577 output_mod_insn (int unsignedp, rtx insn)
5581 import_milli (remU);
5582 return output_millicode_call (insn,
5583 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5587 import_milli (remI);
5588 return output_millicode_call (insn,
5589 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5594 output_arg_descriptor (rtx call_insn)
5596 const char *arg_regs[4];
5597 enum machine_mode arg_mode;
5599 int i, output_flag = 0;
5602 /* We neither need nor want argument location descriptors for the
5603 64bit runtime environment or the ELF32 environment. */
5604 if (TARGET_64BIT || TARGET_ELF32)
5607 for (i = 0; i < 4; i++)
5610 /* Specify explicitly that no argument relocations should take place
5611 if using the portable runtime calling conventions. */
5612 if (TARGET_PORTABLE_RUNTIME)
5614 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5619 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5620 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5621 link; link = XEXP (link, 1))
5623 rtx use = XEXP (link, 0);
5625 if (! (GET_CODE (use) == USE
5626 && GET_CODE (XEXP (use, 0)) == REG
5627 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5630 arg_mode = GET_MODE (XEXP (use, 0));
5631 regno = REGNO (XEXP (use, 0));
5632 if (regno >= 23 && regno <= 26)
5634 arg_regs[26 - regno] = "GR";
5635 if (arg_mode == DImode)
5636 arg_regs[25 - regno] = "GR";
5638 else if (regno >= 32 && regno <= 39)
5640 if (arg_mode == SFmode)
5641 arg_regs[(regno - 32) / 2] = "FR";
5644 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5645 arg_regs[(regno - 34) / 2] = "FR";
5646 arg_regs[(regno - 34) / 2 + 1] = "FU";
5648 arg_regs[(regno - 34) / 2] = "FU";
5649 arg_regs[(regno - 34) / 2 + 1] = "FR";
5654 fputs ("\t.CALL ", asm_out_file);
5655 for (i = 0; i < 4; i++)
5660 fputc (',', asm_out_file);
5661 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5664 fputc ('\n', asm_out_file);
5667 static enum reg_class
5668 pa_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
5669 enum machine_mode mode, secondary_reload_info *sri)
5671 int is_symbolic, regno;
5673 /* Handle the easy stuff first. */
5674 if (rclass == R1_REGS)
5680 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5686 /* If we have something like (mem (mem (...)), we can safely assume the
5687 inner MEM will end up in a general register after reloading, so there's
5688 no need for a secondary reload. */
5689 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5692 /* Trying to load a constant into a FP register during PIC code
5693 generation requires %r1 as a scratch register. */
5695 && (mode == SImode || mode == DImode)
5696 && FP_REG_CLASS_P (rclass)
5697 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5699 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5700 : CODE_FOR_reload_indi_r1);
5704 /* Profiling showed the PA port spends about 1.3% of its compilation
5705 time in true_regnum from calls inside pa_secondary_reload_class. */
5706 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5707 regno = true_regnum (x);
5709 /* In order to allow 14-bit displacements in integer loads and stores,
5710 we need to prevent reload from generating out of range integer mode
5711 loads and stores to the floating point registers. Previously, we
5712 used to call for a secondary reload and have emit_move_sequence()
5713 fix the instruction sequence. However, reload occasionally wouldn't
5714 generate the reload and we would end up with an invalid REG+D memory
5715 address. So, now we use an intermediate general register for most
5716 memory loads and stores. */
5717 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5718 && GET_MODE_CLASS (mode) == MODE_INT
5719 && FP_REG_CLASS_P (rclass))
5721 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5722 the secondary reload needed for a pseudo. It never passes a
5724 if (GET_CODE (x) == MEM)
5728 /* We don't need an intermediate for indexed and LO_SUM DLT
5729 memory addresses. When INT14_OK_STRICT is true, it might
5730 appear that we could directly allow register indirect
5731 memory addresses. However, this doesn't work because we
5732 don't support SUBREGs in floating-point register copies
5733 and reload doesn't tell us when it's going to use a SUBREG. */
5734 if (IS_INDEX_ADDR_P (x)
5735 || IS_LO_SUM_DLT_ADDR_P (x))
5738 /* Otherwise, we need an intermediate general register. */
5739 return GENERAL_REGS;
5742 /* Request a secondary reload with a general scratch register
5743 for everthing else. ??? Could symbolic operands be handled
5744 directly when generating non-pic PA 2.0 code? */
5745 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5749 /* We need a secondary register (GPR) for copies between the SAR
5750 and anything other than a general register. */
5751 if (rclass == SHIFT_REGS && (regno <= 0 || regno >= 32))
5753 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5757 /* A SAR<->FP register copy requires a secondary register (GPR) as
5758 well as secondary memory. */
5759 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5760 && (REGNO_REG_CLASS (regno) == SHIFT_REGS
5761 && FP_REG_CLASS_P (rclass)))
5763 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5767 /* Secondary reloads of symbolic operands require %r1 as a scratch
5768 register when we're generating PIC code and when the operand isn't
5770 if (GET_CODE (x) == HIGH)
5773 /* Profiling has showed GCC spends about 2.6% of its compilation
5774 time in symbolic_operand from calls inside pa_secondary_reload_class.
5775 So, we use an inline copy to avoid useless work. */
5776 switch (GET_CODE (x))
5781 is_symbolic = !SYMBOL_REF_TLS_MODEL (x);
5788 is_symbolic = (((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
5789 && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
5790 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
5791 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5798 if (is_symbolic && (flag_pic || !read_only_operand (x, VOIDmode)))
5800 gcc_assert (mode == SImode || mode == DImode);
5801 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5802 : CODE_FOR_reload_indi_r1);
5808 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
5809 is only marked as live on entry by df-scan when it is a fixed
5810 register. It isn't a fixed register in the 64-bit runtime,
5811 so we need to mark it here. */
5814 pa_extra_live_on_entry (bitmap regs)
5817 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
5820 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
5821 to prevent it from being deleted. */
5824 pa_eh_return_handler_rtx (void)
5828 tmp = gen_rtx_PLUS (word_mode, frame_pointer_rtx,
5829 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
5830 tmp = gen_rtx_MEM (word_mode, tmp);
5835 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5836 by invisible reference. As a GCC extension, we also pass anything
5837 with a zero or variable size by reference.
5839 The 64-bit runtime does not describe passing any types by invisible
5840 reference. The internals of GCC can't currently handle passing
5841 empty structures, and zero or variable length arrays when they are
5842 not passed entirely on the stack or by reference. Thus, as a GCC
5843 extension, we pass these types by reference. The HP compiler doesn't
5844 support these types, so hopefully there shouldn't be any compatibility
5845 issues. This may have to be revisited when HP releases a C99 compiler
5846 or updates the ABI. */
5849 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5850 enum machine_mode mode, const_tree type,
5851 bool named ATTRIBUTE_UNUSED)
5856 size = int_size_in_bytes (type);
5858 size = GET_MODE_SIZE (mode);
5863 return size <= 0 || size > 8;
5867 function_arg_padding (enum machine_mode mode, const_tree type)
5872 && (AGGREGATE_TYPE_P (type)
5873 || TREE_CODE (type) == COMPLEX_TYPE
5874 || TREE_CODE (type) == VECTOR_TYPE)))
5876 /* Return none if justification is not required. */
5878 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5879 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5882 /* The directions set here are ignored when a BLKmode argument larger
5883 than a word is placed in a register. Different code is used for
5884 the stack and registers. This makes it difficult to have a
5885 consistent data representation for both the stack and registers.
5886 For both runtimes, the justification and padding for arguments on
5887 the stack and in registers should be identical. */
5889 /* The 64-bit runtime specifies left justification for aggregates. */
5892 /* The 32-bit runtime architecture specifies right justification.
5893 When the argument is passed on the stack, the argument is padded
5894 with garbage on the left. The HP compiler pads with zeros. */
5898 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5905 /* Do what is necessary for `va_start'. We look at the current function
5906 to determine if stdargs or varargs is used and fill in an initial
5907 va_list. A pointer to this constructor is returned. */
5910 hppa_builtin_saveregs (void)
5913 tree fntype = TREE_TYPE (current_function_decl);
5914 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5915 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5916 != void_type_node)))
5917 ? UNITS_PER_WORD : 0);
5920 offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
5922 offset = crtl->args.arg_offset_rtx;
5928 /* Adjust for varargs/stdarg differences. */
5930 offset = plus_constant (crtl->args.arg_offset_rtx, -argadj);
5932 offset = crtl->args.arg_offset_rtx;
5934 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5935 from the incoming arg pointer and growing to larger addresses. */
5936 for (i = 26, off = -64; i >= 19; i--, off += 8)
5937 emit_move_insn (gen_rtx_MEM (word_mode,
5938 plus_constant (arg_pointer_rtx, off)),
5939 gen_rtx_REG (word_mode, i));
5941 /* The incoming args pointer points just beyond the flushback area;
5942 normally this is not a serious concern. However, when we are doing
5943 varargs/stdargs we want to make the arg pointer point to the start
5944 of the incoming argument area. */
5945 emit_move_insn (virtual_incoming_args_rtx,
5946 plus_constant (arg_pointer_rtx, -64));
5948 /* Now return a pointer to the first anonymous argument. */
5949 return copy_to_reg (expand_binop (Pmode, add_optab,
5950 virtual_incoming_args_rtx,
5951 offset, 0, 0, OPTAB_LIB_WIDEN));
5954 /* Store general registers on the stack. */
5955 dest = gen_rtx_MEM (BLKmode,
5956 plus_constant (crtl->args.internal_arg_pointer,
5958 set_mem_alias_set (dest, get_varargs_alias_set ());
5959 set_mem_align (dest, BITS_PER_WORD);
5960 move_block_from_reg (23, dest, 4);
5962 /* move_block_from_reg will emit code to store the argument registers
5963 individually as scalar stores.
5965 However, other insns may later load from the same addresses for
5966 a structure load (passing a struct to a varargs routine).
5968 The alias code assumes that such aliasing can never happen, so we
5969 have to keep memory referencing insns from moving up beyond the
5970 last argument register store. So we emit a blockage insn here. */
5971 emit_insn (gen_blockage ());
5973 return copy_to_reg (expand_binop (Pmode, add_optab,
5974 crtl->args.internal_arg_pointer,
5975 offset, 0, 0, OPTAB_LIB_WIDEN));
5979 hppa_va_start (tree valist, rtx nextarg)
5981 nextarg = expand_builtin_saveregs ();
5982 std_expand_builtin_va_start (valist, nextarg);
5986 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5991 /* Args grow upward. We can use the generic routines. */
5992 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5994 else /* !TARGET_64BIT */
5996 tree ptr = build_pointer_type (type);
5999 unsigned int size, ofs;
6002 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6006 ptr = build_pointer_type (type);
6008 size = int_size_in_bytes (type);
6009 valist_type = TREE_TYPE (valist);
6011 /* Args grow down. Not handled by generic routines. */
6013 u = fold_convert (sizetype, size_in_bytes (type));
6014 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6015 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
6017 /* Copied from va-pa.h, but we probably don't need to align to
6018 word size, since we generate and preserve that invariant. */
6019 u = size_int (size > 4 ? -8 : -4);
6020 t = fold_convert (sizetype, t);
6021 t = build2 (BIT_AND_EXPR, sizetype, t, u);
6022 t = fold_convert (valist_type, t);
6024 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6026 ofs = (8 - size) % 4;
6030 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
6033 t = fold_convert (ptr, t);
6034 t = build_va_arg_indirect_ref (t);
6037 t = build_va_arg_indirect_ref (t);
6043 /* True if MODE is valid for the target. By "valid", we mean able to
6044 be manipulated in non-trivial ways. In particular, this means all
6045 the arithmetic is supported.
6047 Currently, TImode is not valid as the HP 64-bit runtime documentation
6048 doesn't document the alignment and calling conventions for this type.
6049 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6050 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6053 pa_scalar_mode_supported_p (enum machine_mode mode)
6055 int precision = GET_MODE_PRECISION (mode);
6057 switch (GET_MODE_CLASS (mode))
6059 case MODE_PARTIAL_INT:
6061 if (precision == CHAR_TYPE_SIZE)
6063 if (precision == SHORT_TYPE_SIZE)
6065 if (precision == INT_TYPE_SIZE)
6067 if (precision == LONG_TYPE_SIZE)
6069 if (precision == LONG_LONG_TYPE_SIZE)
6074 if (precision == FLOAT_TYPE_SIZE)
6076 if (precision == DOUBLE_TYPE_SIZE)
6078 if (precision == LONG_DOUBLE_TYPE_SIZE)
6082 case MODE_DECIMAL_FLOAT:
6090 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6091 it branches to the next real instruction. Otherwise, return FALSE. */
6094 branch_to_delay_slot_p (rtx insn)
6096 if (dbr_sequence_length ())
6099 return next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn);
6102 /* Return TRUE if INSN, a jump insn, needs a nop in its delay slot.
6104 This occurs when INSN has an unfilled delay slot and is followed
6105 by an ASM_INPUT. Disaster can occur if the ASM_INPUT is empty and
6106 the jump branches into the delay slot. So, we add a nop in the delay
6107 slot just to be safe. This messes up our instruction count, but we
6108 don't know how big the ASM_INPUT insn is anyway. */
6111 branch_needs_nop_p (rtx insn)
6115 if (dbr_sequence_length ())
6118 next_insn = next_real_insn (insn);
6119 return GET_CODE (PATTERN (next_insn)) == ASM_INPUT;
6122 /* This routine handles all the normal conditional branch sequences we
6123 might need to generate. It handles compare immediate vs compare
6124 register, nullification of delay slots, varying length branches,
6125 negated branches, and all combinations of the above. It returns the
6126 output appropriate to emit the branch corresponding to all given
6130 output_cbranch (rtx *operands, int negated, rtx insn)
6132 static char buf[100];
6134 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6135 int length = get_attr_length (insn);
6138 /* A conditional branch to the following instruction (e.g. the delay slot)
6139 is asking for a disaster. This can happen when not optimizing and
6140 when jump optimization fails.
6142 While it is usually safe to emit nothing, this can fail if the
6143 preceding instruction is a nullified branch with an empty delay
6144 slot and the same branch target as this branch. We could check
6145 for this but jump optimization should eliminate nop jumps. It
6146 is always safe to emit a nop. */
6147 if (branch_to_delay_slot_p (insn))
6150 /* The doubleword form of the cmpib instruction doesn't have the LEU
6151 and GTU conditions while the cmpb instruction does. Since we accept
6152 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6153 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6154 operands[2] = gen_rtx_REG (DImode, 0);
6155 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6156 operands[1] = gen_rtx_REG (DImode, 0);
6158 /* If this is a long branch with its delay slot unfilled, set `nullify'
6159 as it can nullify the delay slot and save a nop. */
6160 if (length == 8 && dbr_sequence_length () == 0)
6163 /* If this is a short forward conditional branch which did not get
6164 its delay slot filled, the delay slot can still be nullified. */
6165 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6166 nullify = forward_branch_p (insn);
6168 /* A forward branch over a single nullified insn can be done with a
6169 comclr instruction. This avoids a single cycle penalty due to
6170 mis-predicted branch if we fall through (branch not taken). */
6172 && next_real_insn (insn) != 0
6173 && get_attr_length (next_real_insn (insn)) == 4
6174 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6180 /* All short conditional branches except backwards with an unfilled
6184 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6186 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6187 if (GET_MODE (operands[1]) == DImode)
6190 strcat (buf, "%B3");
6192 strcat (buf, "%S3");
6194 strcat (buf, " %2,%r1,%%r0");
6197 if (branch_needs_nop_p (insn))
6198 strcat (buf, ",n %2,%r1,%0%#");
6200 strcat (buf, ",n %2,%r1,%0");
6203 strcat (buf, " %2,%r1,%0");
6206 /* All long conditionals. Note a short backward branch with an
6207 unfilled delay slot is treated just like a long backward branch
6208 with an unfilled delay slot. */
6210 /* Handle weird backwards branch with a filled delay slot
6211 which is nullified. */
6212 if (dbr_sequence_length () != 0
6213 && ! forward_branch_p (insn)
6216 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6217 if (GET_MODE (operands[1]) == DImode)
6220 strcat (buf, "%S3");
6222 strcat (buf, "%B3");
6223 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6225 /* Handle short backwards branch with an unfilled delay slot.
6226 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6227 taken and untaken branches. */
6228 else if (dbr_sequence_length () == 0
6229 && ! forward_branch_p (insn)
6230 && INSN_ADDRESSES_SET_P ()
6231 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6232 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6234 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6235 if (GET_MODE (operands[1]) == DImode)
6238 strcat (buf, "%B3 %2,%r1,%0%#");
6240 strcat (buf, "%S3 %2,%r1,%0%#");
6244 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6245 if (GET_MODE (operands[1]) == DImode)
6248 strcat (buf, "%S3");
6250 strcat (buf, "%B3");
6252 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6254 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6259 /* The reversed conditional branch must branch over one additional
6260 instruction if the delay slot is filled and needs to be extracted
6261 by output_lbranch. If the delay slot is empty or this is a
6262 nullified forward branch, the instruction after the reversed
6263 condition branch must be nullified. */
6264 if (dbr_sequence_length () == 0
6265 || (nullify && forward_branch_p (insn)))
6269 operands[4] = GEN_INT (length);
6274 operands[4] = GEN_INT (length + 4);
6277 /* Create a reversed conditional branch which branches around
6278 the following insns. */
6279 if (GET_MODE (operands[1]) != DImode)
6285 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6288 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6294 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6297 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6306 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6309 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6315 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6318 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6322 output_asm_insn (buf, operands);
6323 return output_lbranch (operands[0], insn, xdelay);
6328 /* This routine handles output of long unconditional branches that
6329 exceed the maximum range of a simple branch instruction. Since
6330 we don't have a register available for the branch, we save register
6331 %r1 in the frame marker, load the branch destination DEST into %r1,
6332 execute the branch, and restore %r1 in the delay slot of the branch.
6334 Since long branches may have an insn in the delay slot and the
6335 delay slot is used to restore %r1, we in general need to extract
6336 this insn and execute it before the branch. However, to facilitate
6337 use of this function by conditional branches, we also provide an
6338 option to not extract the delay insn so that it will be emitted
6339 after the long branch. So, if there is an insn in the delay slot,
6340 it is extracted if XDELAY is nonzero.
6342 The lengths of the various long-branch sequences are 20, 16 and 24
6343 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6346 output_lbranch (rtx dest, rtx insn, int xdelay)
6350 xoperands[0] = dest;
6352 /* First, free up the delay slot. */
6353 if (xdelay && dbr_sequence_length () != 0)
6355 /* We can't handle a jump in the delay slot. */
6356 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6358 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6361 /* Now delete the delay insn. */
6362 SET_INSN_DELETED (NEXT_INSN (insn));
6365 /* Output an insn to save %r1. The runtime documentation doesn't
6366 specify whether the "Clean Up" slot in the callers frame can
6367 be clobbered by the callee. It isn't copied by HP's builtin
6368 alloca, so this suggests that it can be clobbered if necessary.
6369 The "Static Link" location is copied by HP builtin alloca, so
6370 we avoid using it. Using the cleanup slot might be a problem
6371 if we have to interoperate with languages that pass cleanup
6372 information. However, it should be possible to handle these
6373 situations with GCC's asm feature.
6375 The "Current RP" slot is reserved for the called procedure, so
6376 we try to use it when we don't have a frame of our own. It's
6377 rather unlikely that we won't have a frame when we need to emit
6380 Really the way to go long term is a register scavenger; goto
6381 the target of the jump and find a register which we can use
6382 as a scratch to hold the value in %r1. Then, we wouldn't have
6383 to free up the delay slot or clobber a slot that may be needed
6384 for other purposes. */
6387 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6388 /* Use the return pointer slot in the frame marker. */
6389 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6391 /* Use the slot at -40 in the frame marker since HP builtin
6392 alloca doesn't copy it. */
6393 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6397 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6398 /* Use the return pointer slot in the frame marker. */
6399 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6401 /* Use the "Clean Up" slot in the frame marker. In GCC,
6402 the only other use of this location is for copying a
6403 floating point double argument from a floating-point
6404 register to two general registers. The copy is done
6405 as an "atomic" operation when outputting a call, so it
6406 won't interfere with our using the location here. */
6407 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6410 if (TARGET_PORTABLE_RUNTIME)
6412 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6413 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6414 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6418 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6419 if (TARGET_SOM || !TARGET_GAS)
6421 xoperands[1] = gen_label_rtx ();
6422 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6423 targetm.asm_out.internal_label (asm_out_file, "L",
6424 CODE_LABEL_NUMBER (xoperands[1]));
6425 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6429 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6430 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6432 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6435 /* Now output a very long branch to the original target. */
6436 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6438 /* Now restore the value of %r1 in the delay slot. */
6441 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6442 return "ldd -16(%%r30),%%r1";
6444 return "ldd -40(%%r30),%%r1";
6448 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6449 return "ldw -20(%%r30),%%r1";
6451 return "ldw -12(%%r30),%%r1";
6455 /* This routine handles all the branch-on-bit conditional branch sequences we
6456 might need to generate. It handles nullification of delay slots,
6457 varying length branches, negated branches and all combinations of the
6458 above. it returns the appropriate output template to emit the branch. */
6461 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6463 static char buf[100];
6465 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6466 int length = get_attr_length (insn);
6469 /* A conditional branch to the following instruction (e.g. the delay slot) is
6470 asking for a disaster. I do not think this can happen as this pattern
6471 is only used when optimizing; jump optimization should eliminate the
6472 jump. But be prepared just in case. */
6474 if (branch_to_delay_slot_p (insn))
6477 /* If this is a long branch with its delay slot unfilled, set `nullify'
6478 as it can nullify the delay slot and save a nop. */
6479 if (length == 8 && dbr_sequence_length () == 0)
6482 /* If this is a short forward conditional branch which did not get
6483 its delay slot filled, the delay slot can still be nullified. */
6484 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6485 nullify = forward_branch_p (insn);
6487 /* A forward branch over a single nullified insn can be done with a
6488 extrs instruction. This avoids a single cycle penalty due to
6489 mis-predicted branch if we fall through (branch not taken). */
6492 && next_real_insn (insn) != 0
6493 && get_attr_length (next_real_insn (insn)) == 4
6494 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6501 /* All short conditional branches except backwards with an unfilled
6505 strcpy (buf, "{extrs,|extrw,s,}");
6507 strcpy (buf, "bb,");
6508 if (useskip && GET_MODE (operands[0]) == DImode)
6509 strcpy (buf, "extrd,s,*");
6510 else if (GET_MODE (operands[0]) == DImode)
6511 strcpy (buf, "bb,*");
6512 if ((which == 0 && negated)
6513 || (which == 1 && ! negated))
6518 strcat (buf, " %0,%1,1,%%r0");
6519 else if (nullify && negated)
6521 if (branch_needs_nop_p (insn))
6522 strcat (buf, ",n %0,%1,%3%#");
6524 strcat (buf, ",n %0,%1,%3");
6526 else if (nullify && ! negated)
6528 if (branch_needs_nop_p (insn))
6529 strcat (buf, ",n %0,%1,%2%#");
6531 strcat (buf, ",n %0,%1,%2");
6533 else if (! nullify && negated)
6534 strcat (buf, " %0,%1,%3");
6535 else if (! nullify && ! negated)
6536 strcat (buf, " %0,%1,%2");
6539 /* All long conditionals. Note a short backward branch with an
6540 unfilled delay slot is treated just like a long backward branch
6541 with an unfilled delay slot. */
6543 /* Handle weird backwards branch with a filled delay slot
6544 which is nullified. */
6545 if (dbr_sequence_length () != 0
6546 && ! forward_branch_p (insn)
6549 strcpy (buf, "bb,");
6550 if (GET_MODE (operands[0]) == DImode)
6552 if ((which == 0 && negated)
6553 || (which == 1 && ! negated))
6558 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6560 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6562 /* Handle short backwards branch with an unfilled delay slot.
6563 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6564 taken and untaken branches. */
6565 else if (dbr_sequence_length () == 0
6566 && ! forward_branch_p (insn)
6567 && INSN_ADDRESSES_SET_P ()
6568 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6569 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6571 strcpy (buf, "bb,");
6572 if (GET_MODE (operands[0]) == DImode)
6574 if ((which == 0 && negated)
6575 || (which == 1 && ! negated))
6580 strcat (buf, " %0,%1,%3%#");
6582 strcat (buf, " %0,%1,%2%#");
6586 if (GET_MODE (operands[0]) == DImode)
6587 strcpy (buf, "extrd,s,*");
6589 strcpy (buf, "{extrs,|extrw,s,}");
6590 if ((which == 0 && negated)
6591 || (which == 1 && ! negated))
6595 if (nullify && negated)
6596 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6597 else if (nullify && ! negated)
6598 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6600 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6602 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6607 /* The reversed conditional branch must branch over one additional
6608 instruction if the delay slot is filled and needs to be extracted
6609 by output_lbranch. If the delay slot is empty or this is a
6610 nullified forward branch, the instruction after the reversed
6611 condition branch must be nullified. */
6612 if (dbr_sequence_length () == 0
6613 || (nullify && forward_branch_p (insn)))
6617 operands[4] = GEN_INT (length);
6622 operands[4] = GEN_INT (length + 4);
6625 if (GET_MODE (operands[0]) == DImode)
6626 strcpy (buf, "bb,*");
6628 strcpy (buf, "bb,");
6629 if ((which == 0 && negated)
6630 || (which == 1 && !negated))
6635 strcat (buf, ",n %0,%1,.+%4");
6637 strcat (buf, " %0,%1,.+%4");
6638 output_asm_insn (buf, operands);
6639 return output_lbranch (negated ? operands[3] : operands[2],
6645 /* This routine handles all the branch-on-variable-bit conditional branch
6646 sequences we might need to generate. It handles nullification of delay
6647 slots, varying length branches, negated branches and all combinations
6648 of the above. it returns the appropriate output template to emit the
6652 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6654 static char buf[100];
6656 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6657 int length = get_attr_length (insn);
6660 /* A conditional branch to the following instruction (e.g. the delay slot) is
6661 asking for a disaster. I do not think this can happen as this pattern
6662 is only used when optimizing; jump optimization should eliminate the
6663 jump. But be prepared just in case. */
6665 if (branch_to_delay_slot_p (insn))
6668 /* If this is a long branch with its delay slot unfilled, set `nullify'
6669 as it can nullify the delay slot and save a nop. */
6670 if (length == 8 && dbr_sequence_length () == 0)
6673 /* If this is a short forward conditional branch which did not get
6674 its delay slot filled, the delay slot can still be nullified. */
6675 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6676 nullify = forward_branch_p (insn);
6678 /* A forward branch over a single nullified insn can be done with a
6679 extrs instruction. This avoids a single cycle penalty due to
6680 mis-predicted branch if we fall through (branch not taken). */
6683 && next_real_insn (insn) != 0
6684 && get_attr_length (next_real_insn (insn)) == 4
6685 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6692 /* All short conditional branches except backwards with an unfilled
6696 strcpy (buf, "{vextrs,|extrw,s,}");
6698 strcpy (buf, "{bvb,|bb,}");
6699 if (useskip && GET_MODE (operands[0]) == DImode)
6700 strcpy (buf, "extrd,s,*");
6701 else if (GET_MODE (operands[0]) == DImode)
6702 strcpy (buf, "bb,*");
6703 if ((which == 0 && negated)
6704 || (which == 1 && ! negated))
6709 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6710 else if (nullify && negated)
6712 if (branch_needs_nop_p (insn))
6713 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6715 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6717 else if (nullify && ! negated)
6719 if (branch_needs_nop_p (insn))
6720 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6722 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6724 else if (! nullify && negated)
6725 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6726 else if (! nullify && ! negated)
6727 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6730 /* All long conditionals. Note a short backward branch with an
6731 unfilled delay slot is treated just like a long backward branch
6732 with an unfilled delay slot. */
6734 /* Handle weird backwards branch with a filled delay slot
6735 which is nullified. */
6736 if (dbr_sequence_length () != 0
6737 && ! forward_branch_p (insn)
6740 strcpy (buf, "{bvb,|bb,}");
6741 if (GET_MODE (operands[0]) == DImode)
6743 if ((which == 0 && negated)
6744 || (which == 1 && ! negated))
6749 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6751 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6753 /* Handle short backwards branch with an unfilled delay slot.
6754 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6755 taken and untaken branches. */
6756 else if (dbr_sequence_length () == 0
6757 && ! forward_branch_p (insn)
6758 && INSN_ADDRESSES_SET_P ()
6759 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6760 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6762 strcpy (buf, "{bvb,|bb,}");
6763 if (GET_MODE (operands[0]) == DImode)
6765 if ((which == 0 && negated)
6766 || (which == 1 && ! negated))
6771 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6773 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6777 strcpy (buf, "{vextrs,|extrw,s,}");
6778 if (GET_MODE (operands[0]) == DImode)
6779 strcpy (buf, "extrd,s,*");
6780 if ((which == 0 && negated)
6781 || (which == 1 && ! negated))
6785 if (nullify && negated)
6786 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6787 else if (nullify && ! negated)
6788 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6790 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6792 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6797 /* The reversed conditional branch must branch over one additional
6798 instruction if the delay slot is filled and needs to be extracted
6799 by output_lbranch. If the delay slot is empty or this is a
6800 nullified forward branch, the instruction after the reversed
6801 condition branch must be nullified. */
6802 if (dbr_sequence_length () == 0
6803 || (nullify && forward_branch_p (insn)))
6807 operands[4] = GEN_INT (length);
6812 operands[4] = GEN_INT (length + 4);
6815 if (GET_MODE (operands[0]) == DImode)
6816 strcpy (buf, "bb,*");
6818 strcpy (buf, "{bvb,|bb,}");
6819 if ((which == 0 && negated)
6820 || (which == 1 && !negated))
6825 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6827 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6828 output_asm_insn (buf, operands);
6829 return output_lbranch (negated ? operands[3] : operands[2],
6835 /* Return the output template for emitting a dbra type insn.
6837 Note it may perform some output operations on its own before
6838 returning the final output string. */
6840 output_dbra (rtx *operands, rtx insn, int which_alternative)
6842 int length = get_attr_length (insn);
6844 /* A conditional branch to the following instruction (e.g. the delay slot) is
6845 asking for a disaster. Be prepared! */
6847 if (branch_to_delay_slot_p (insn))
6849 if (which_alternative == 0)
6850 return "ldo %1(%0),%0";
6851 else if (which_alternative == 1)
6853 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6854 output_asm_insn ("ldw -16(%%r30),%4", operands);
6855 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6856 return "{fldws|fldw} -16(%%r30),%0";
6860 output_asm_insn ("ldw %0,%4", operands);
6861 return "ldo %1(%4),%4\n\tstw %4,%0";
6865 if (which_alternative == 0)
6867 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6870 /* If this is a long branch with its delay slot unfilled, set `nullify'
6871 as it can nullify the delay slot and save a nop. */
6872 if (length == 8 && dbr_sequence_length () == 0)
6875 /* If this is a short forward conditional branch which did not get
6876 its delay slot filled, the delay slot can still be nullified. */
6877 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6878 nullify = forward_branch_p (insn);
6885 if (branch_needs_nop_p (insn))
6886 return "addib,%C2,n %1,%0,%3%#";
6888 return "addib,%C2,n %1,%0,%3";
6891 return "addib,%C2 %1,%0,%3";
6894 /* Handle weird backwards branch with a fulled delay slot
6895 which is nullified. */
6896 if (dbr_sequence_length () != 0
6897 && ! forward_branch_p (insn)
6899 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6900 /* Handle short backwards branch with an unfilled delay slot.
6901 Using a addb;nop rather than addi;bl saves 1 cycle for both
6902 taken and untaken branches. */
6903 else if (dbr_sequence_length () == 0
6904 && ! forward_branch_p (insn)
6905 && INSN_ADDRESSES_SET_P ()
6906 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6907 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6908 return "addib,%C2 %1,%0,%3%#";
6910 /* Handle normal cases. */
6912 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6914 return "addi,%N2 %1,%0,%0\n\tb %3";
6917 /* The reversed conditional branch must branch over one additional
6918 instruction if the delay slot is filled and needs to be extracted
6919 by output_lbranch. If the delay slot is empty or this is a
6920 nullified forward branch, the instruction after the reversed
6921 condition branch must be nullified. */
6922 if (dbr_sequence_length () == 0
6923 || (nullify && forward_branch_p (insn)))
6927 operands[4] = GEN_INT (length);
6932 operands[4] = GEN_INT (length + 4);
6936 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
6938 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
6940 return output_lbranch (operands[3], insn, xdelay);
6944 /* Deal with gross reload from FP register case. */
6945 else if (which_alternative == 1)
6947 /* Move loop counter from FP register to MEM then into a GR,
6948 increment the GR, store the GR into MEM, and finally reload
6949 the FP register from MEM from within the branch's delay slot. */
6950 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6952 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6954 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6955 else if (length == 28)
6956 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6959 operands[5] = GEN_INT (length - 16);
6960 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
6961 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6962 return output_lbranch (operands[3], insn, 0);
6965 /* Deal with gross reload from memory case. */
6968 /* Reload loop counter from memory, the store back to memory
6969 happens in the branch's delay slot. */
6970 output_asm_insn ("ldw %0,%4", operands);
6972 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6973 else if (length == 16)
6974 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6977 operands[5] = GEN_INT (length - 4);
6978 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
6979 return output_lbranch (operands[3], insn, 0);
6984 /* Return the output template for emitting a movb type insn.
6986 Note it may perform some output operations on its own before
6987 returning the final output string. */
6989 output_movb (rtx *operands, rtx insn, int which_alternative,
6990 int reverse_comparison)
6992 int length = get_attr_length (insn);
6994 /* A conditional branch to the following instruction (e.g. the delay slot) is
6995 asking for a disaster. Be prepared! */
6997 if (branch_to_delay_slot_p (insn))
6999 if (which_alternative == 0)
7000 return "copy %1,%0";
7001 else if (which_alternative == 1)
7003 output_asm_insn ("stw %1,-16(%%r30)", operands);
7004 return "{fldws|fldw} -16(%%r30),%0";
7006 else if (which_alternative == 2)
7012 /* Support the second variant. */
7013 if (reverse_comparison)
7014 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7016 if (which_alternative == 0)
7018 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7021 /* If this is a long branch with its delay slot unfilled, set `nullify'
7022 as it can nullify the delay slot and save a nop. */
7023 if (length == 8 && dbr_sequence_length () == 0)
7026 /* If this is a short forward conditional branch which did not get
7027 its delay slot filled, the delay slot can still be nullified. */
7028 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7029 nullify = forward_branch_p (insn);
7036 if (branch_needs_nop_p (insn))
7037 return "movb,%C2,n %1,%0,%3%#";
7039 return "movb,%C2,n %1,%0,%3";
7042 return "movb,%C2 %1,%0,%3";
7045 /* Handle weird backwards branch with a filled delay slot
7046 which is nullified. */
7047 if (dbr_sequence_length () != 0
7048 && ! forward_branch_p (insn)
7050 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7052 /* Handle short backwards branch with an unfilled delay slot.
7053 Using a movb;nop rather than or;bl saves 1 cycle for both
7054 taken and untaken branches. */
7055 else if (dbr_sequence_length () == 0
7056 && ! forward_branch_p (insn)
7057 && INSN_ADDRESSES_SET_P ()
7058 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7059 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7060 return "movb,%C2 %1,%0,%3%#";
7061 /* Handle normal cases. */
7063 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7065 return "or,%N2 %1,%%r0,%0\n\tb %3";
7068 /* The reversed conditional branch must branch over one additional
7069 instruction if the delay slot is filled and needs to be extracted
7070 by output_lbranch. If the delay slot is empty or this is a
7071 nullified forward branch, the instruction after the reversed
7072 condition branch must be nullified. */
7073 if (dbr_sequence_length () == 0
7074 || (nullify && forward_branch_p (insn)))
7078 operands[4] = GEN_INT (length);
7083 operands[4] = GEN_INT (length + 4);
7087 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7089 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7091 return output_lbranch (operands[3], insn, xdelay);
7094 /* Deal with gross reload for FP destination register case. */
7095 else if (which_alternative == 1)
7097 /* Move source register to MEM, perform the branch test, then
7098 finally load the FP register from MEM from within the branch's
7100 output_asm_insn ("stw %1,-16(%%r30)", operands);
7102 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7103 else if (length == 16)
7104 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7107 operands[4] = GEN_INT (length - 4);
7108 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7109 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7110 return output_lbranch (operands[3], insn, 0);
7113 /* Deal with gross reload from memory case. */
7114 else if (which_alternative == 2)
7116 /* Reload loop counter from memory, the store back to memory
7117 happens in the branch's delay slot. */
7119 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7120 else if (length == 12)
7121 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7124 operands[4] = GEN_INT (length);
7125 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7127 return output_lbranch (operands[3], insn, 0);
7130 /* Handle SAR as a destination. */
7134 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7135 else if (length == 12)
7136 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7139 operands[4] = GEN_INT (length);
7140 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7142 return output_lbranch (operands[3], insn, 0);
7147 /* Copy any FP arguments in INSN into integer registers. */
7149 copy_fp_args (rtx insn)
7154 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7156 int arg_mode, regno;
7157 rtx use = XEXP (link, 0);
7159 if (! (GET_CODE (use) == USE
7160 && GET_CODE (XEXP (use, 0)) == REG
7161 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7164 arg_mode = GET_MODE (XEXP (use, 0));
7165 regno = REGNO (XEXP (use, 0));
7167 /* Is it a floating point register? */
7168 if (regno >= 32 && regno <= 39)
7170 /* Copy the FP register into an integer register via memory. */
7171 if (arg_mode == SFmode)
7173 xoperands[0] = XEXP (use, 0);
7174 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7175 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7176 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7180 xoperands[0] = XEXP (use, 0);
7181 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7182 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7183 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7184 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7190 /* Compute length of the FP argument copy sequence for INSN. */
7192 length_fp_args (rtx insn)
7197 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7199 int arg_mode, regno;
7200 rtx use = XEXP (link, 0);
7202 if (! (GET_CODE (use) == USE
7203 && GET_CODE (XEXP (use, 0)) == REG
7204 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7207 arg_mode = GET_MODE (XEXP (use, 0));
7208 regno = REGNO (XEXP (use, 0));
7210 /* Is it a floating point register? */
7211 if (regno >= 32 && regno <= 39)
7213 if (arg_mode == SFmode)
7223 /* Return the attribute length for the millicode call instruction INSN.
7224 The length must match the code generated by output_millicode_call.
7225 We include the delay slot in the returned length as it is better to
7226 over estimate the length than to under estimate it. */
7229 attr_length_millicode_call (rtx insn)
7231 unsigned long distance = -1;
7232 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7234 if (INSN_ADDRESSES_SET_P ())
7236 distance = (total + insn_current_reference_address (insn));
7237 if (distance < total)
7243 if (!TARGET_LONG_CALLS && distance < 7600000)
7248 else if (TARGET_PORTABLE_RUNTIME)
7252 if (!TARGET_LONG_CALLS && distance < 240000)
7255 if (TARGET_LONG_ABS_CALL && !flag_pic)
7262 /* INSN is a function call. It may have an unconditional jump
7265 CALL_DEST is the routine we are calling. */
7268 output_millicode_call (rtx insn, rtx call_dest)
7270 int attr_length = get_attr_length (insn);
7271 int seq_length = dbr_sequence_length ();
7276 xoperands[0] = call_dest;
7277 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7279 /* Handle the common case where we are sure that the branch will
7280 reach the beginning of the $CODE$ subspace. The within reach
7281 form of the $$sh_func_adrs call has a length of 28. Because
7282 it has an attribute type of multi, it never has a nonzero
7283 sequence length. The length of the $$sh_func_adrs is the same
7284 as certain out of reach PIC calls to other routines. */
7285 if (!TARGET_LONG_CALLS
7286 && ((seq_length == 0
7287 && (attr_length == 12
7288 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7289 || (seq_length != 0 && attr_length == 8)))
7291 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7297 /* It might seem that one insn could be saved by accessing
7298 the millicode function using the linkage table. However,
7299 this doesn't work in shared libraries and other dynamically
7300 loaded objects. Using a pc-relative sequence also avoids
7301 problems related to the implicit use of the gp register. */
7302 output_asm_insn ("b,l .+8,%%r1", xoperands);
7306 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7307 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7311 xoperands[1] = gen_label_rtx ();
7312 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7313 targetm.asm_out.internal_label (asm_out_file, "L",
7314 CODE_LABEL_NUMBER (xoperands[1]));
7315 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7318 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7320 else if (TARGET_PORTABLE_RUNTIME)
7322 /* Pure portable runtime doesn't allow be/ble; we also don't
7323 have PIC support in the assembler/linker, so this sequence
7326 /* Get the address of our target into %r1. */
7327 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7328 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7330 /* Get our return address into %r31. */
7331 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7332 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7334 /* Jump to our target address in %r1. */
7335 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7339 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7341 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7343 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7347 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7348 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7350 if (TARGET_SOM || !TARGET_GAS)
7352 /* The HP assembler can generate relocations for the
7353 difference of two symbols. GAS can do this for a
7354 millicode symbol but not an arbitrary external
7355 symbol when generating SOM output. */
7356 xoperands[1] = gen_label_rtx ();
7357 targetm.asm_out.internal_label (asm_out_file, "L",
7358 CODE_LABEL_NUMBER (xoperands[1]));
7359 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7360 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7364 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7365 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7369 /* Jump to our target address in %r1. */
7370 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7374 if (seq_length == 0)
7375 output_asm_insn ("nop", xoperands);
7377 /* We are done if there isn't a jump in the delay slot. */
7378 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7381 /* This call has an unconditional jump in its delay slot. */
7382 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7384 /* See if the return address can be adjusted. Use the containing
7385 sequence insn's address. */
7386 if (INSN_ADDRESSES_SET_P ())
7388 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7389 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7390 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7392 if (VAL_14_BITS_P (distance))
7394 xoperands[1] = gen_label_rtx ();
7395 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7396 targetm.asm_out.internal_label (asm_out_file, "L",
7397 CODE_LABEL_NUMBER (xoperands[1]));
7400 /* ??? This branch may not reach its target. */
7401 output_asm_insn ("nop\n\tb,n %0", xoperands);
7404 /* ??? This branch may not reach its target. */
7405 output_asm_insn ("nop\n\tb,n %0", xoperands);
7407 /* Delete the jump. */
7408 SET_INSN_DELETED (NEXT_INSN (insn));
7413 /* Return the attribute length of the call instruction INSN. The SIBCALL
7414 flag indicates whether INSN is a regular call or a sibling call. The
7415 length returned must be longer than the code actually generated by
7416 output_call. Since branch shortening is done before delay branch
7417 sequencing, there is no way to determine whether or not the delay
7418 slot will be filled during branch shortening. Even when the delay
7419 slot is filled, we may have to add a nop if the delay slot contains
7420 a branch that can't reach its target. Thus, we always have to include
7421 the delay slot in the length estimate. This used to be done in
7422 pa_adjust_insn_length but we do it here now as some sequences always
7423 fill the delay slot and we can save four bytes in the estimate for
7427 attr_length_call (rtx insn, int sibcall)
7430 rtx call, call_dest;
7433 rtx pat = PATTERN (insn);
7434 unsigned long distance = -1;
7436 gcc_assert (GET_CODE (insn) == CALL_INSN);
7438 if (INSN_ADDRESSES_SET_P ())
7440 unsigned long total;
7442 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7443 distance = (total + insn_current_reference_address (insn));
7444 if (distance < total)
7448 gcc_assert (GET_CODE (pat) == PARALLEL);
7450 /* Get the call rtx. */
7451 call = XVECEXP (pat, 0, 0);
7452 if (GET_CODE (call) == SET)
7453 call = SET_SRC (call);
7455 gcc_assert (GET_CODE (call) == CALL);
7457 /* Determine if this is a local call. */
7458 call_dest = XEXP (XEXP (call, 0), 0);
7459 call_decl = SYMBOL_REF_DECL (call_dest);
7460 local_call = call_decl && targetm.binds_local_p (call_decl);
7462 /* pc-relative branch. */
7463 if (!TARGET_LONG_CALLS
7464 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7465 || distance < 240000))
7468 /* 64-bit plabel sequence. */
7469 else if (TARGET_64BIT && !local_call)
7470 length += sibcall ? 28 : 24;
7472 /* non-pic long absolute branch sequence. */
7473 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7476 /* long pc-relative branch sequence. */
7477 else if (TARGET_LONG_PIC_SDIFF_CALL
7478 || (TARGET_GAS && !TARGET_SOM
7479 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7483 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && flag_pic)
7487 /* 32-bit plabel sequence. */
7493 length += length_fp_args (insn);
7503 if (!TARGET_NO_SPACE_REGS && flag_pic)
7511 /* INSN is a function call. It may have an unconditional jump
7514 CALL_DEST is the routine we are calling. */
7517 output_call (rtx insn, rtx call_dest, int sibcall)
7519 int delay_insn_deleted = 0;
7520 int delay_slot_filled = 0;
7521 int seq_length = dbr_sequence_length ();
7522 tree call_decl = SYMBOL_REF_DECL (call_dest);
7523 int local_call = call_decl && targetm.binds_local_p (call_decl);
7526 xoperands[0] = call_dest;
7528 /* Handle the common case where we're sure that the branch will reach
7529 the beginning of the "$CODE$" subspace. This is the beginning of
7530 the current function if we are in a named section. */
7531 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7533 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7534 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7538 if (TARGET_64BIT && !local_call)
7540 /* ??? As far as I can tell, the HP linker doesn't support the
7541 long pc-relative sequence described in the 64-bit runtime
7542 architecture. So, we use a slightly longer indirect call. */
7543 xoperands[0] = get_deferred_plabel (call_dest);
7544 xoperands[1] = gen_label_rtx ();
7546 /* If this isn't a sibcall, we put the load of %r27 into the
7547 delay slot. We can't do this in a sibcall as we don't
7548 have a second call-clobbered scratch register available. */
7550 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7553 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7556 /* Now delete the delay insn. */
7557 SET_INSN_DELETED (NEXT_INSN (insn));
7558 delay_insn_deleted = 1;
7561 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7562 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7563 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7567 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7568 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7569 output_asm_insn ("bve (%%r1)", xoperands);
7573 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7574 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7575 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7576 delay_slot_filled = 1;
7581 int indirect_call = 0;
7583 /* Emit a long call. There are several different sequences
7584 of increasing length and complexity. In most cases,
7585 they don't allow an instruction in the delay slot. */
7586 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7587 && !TARGET_LONG_PIC_SDIFF_CALL
7588 && !(TARGET_GAS && !TARGET_SOM
7589 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7594 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7598 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7600 /* A non-jump insn in the delay slot. By definition we can
7601 emit this insn before the call (and in fact before argument
7603 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7606 /* Now delete the delay insn. */
7607 SET_INSN_DELETED (NEXT_INSN (insn));
7608 delay_insn_deleted = 1;
7611 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7613 /* This is the best sequence for making long calls in
7614 non-pic code. Unfortunately, GNU ld doesn't provide
7615 the stub needed for external calls, and GAS's support
7616 for this with the SOM linker is buggy. It is safe
7617 to use this for local calls. */
7618 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7620 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7624 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7627 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7629 output_asm_insn ("copy %%r31,%%r2", xoperands);
7630 delay_slot_filled = 1;
7635 if (TARGET_LONG_PIC_SDIFF_CALL)
7637 /* The HP assembler and linker can handle relocations
7638 for the difference of two symbols. The HP assembler
7639 recognizes the sequence as a pc-relative call and
7640 the linker provides stubs when needed. */
7641 xoperands[1] = gen_label_rtx ();
7642 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7643 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7644 targetm.asm_out.internal_label (asm_out_file, "L",
7645 CODE_LABEL_NUMBER (xoperands[1]));
7646 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7648 else if (TARGET_GAS && !TARGET_SOM
7649 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7651 /* GAS currently can't generate the relocations that
7652 are needed for the SOM linker under HP-UX using this
7653 sequence. The GNU linker doesn't generate the stubs
7654 that are needed for external calls on TARGET_ELF32
7655 with this sequence. For now, we have to use a
7656 longer plabel sequence when using GAS. */
7657 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7658 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7660 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7665 /* Emit a long plabel-based call sequence. This is
7666 essentially an inline implementation of $$dyncall.
7667 We don't actually try to call $$dyncall as this is
7668 as difficult as calling the function itself. */
7669 xoperands[0] = get_deferred_plabel (call_dest);
7670 xoperands[1] = gen_label_rtx ();
7672 /* Since the call is indirect, FP arguments in registers
7673 need to be copied to the general registers. Then, the
7674 argument relocation stub will copy them back. */
7676 copy_fp_args (insn);
7680 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7681 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7682 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7686 output_asm_insn ("addil LR'%0-$global$,%%r27",
7688 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7692 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7693 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7694 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7695 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7697 if (!sibcall && !TARGET_PA_20)
7699 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7700 if (TARGET_NO_SPACE_REGS)
7701 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7703 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7710 output_asm_insn ("bve (%%r1)", xoperands);
7715 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7716 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7717 delay_slot_filled = 1;
7720 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7725 if (!TARGET_NO_SPACE_REGS && flag_pic)
7726 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7731 if (TARGET_NO_SPACE_REGS || !flag_pic)
7732 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7734 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7738 if (TARGET_NO_SPACE_REGS || !flag_pic)
7739 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7741 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7744 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7746 output_asm_insn ("copy %%r31,%%r2", xoperands);
7747 delay_slot_filled = 1;
7754 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7755 output_asm_insn ("nop", xoperands);
7757 /* We are done if there isn't a jump in the delay slot. */
7759 || delay_insn_deleted
7760 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7763 /* A sibcall should never have a branch in the delay slot. */
7764 gcc_assert (!sibcall);
7766 /* This call has an unconditional jump in its delay slot. */
7767 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7769 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7771 /* See if the return address can be adjusted. Use the containing
7772 sequence insn's address. This would break the regular call/return@
7773 relationship assumed by the table based eh unwinder, so only do that
7774 if the call is not possibly throwing. */
7775 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7776 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7777 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7779 if (VAL_14_BITS_P (distance)
7780 && !(can_throw_internal (insn) || can_throw_external (insn)))
7782 xoperands[1] = gen_label_rtx ();
7783 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7784 targetm.asm_out.internal_label (asm_out_file, "L",
7785 CODE_LABEL_NUMBER (xoperands[1]));
7788 output_asm_insn ("nop\n\tb,n %0", xoperands);
7791 output_asm_insn ("b,n %0", xoperands);
7793 /* Delete the jump. */
7794 SET_INSN_DELETED (NEXT_INSN (insn));
7799 /* Return the attribute length of the indirect call instruction INSN.
7800 The length must match the code generated by output_indirect call.
7801 The returned length includes the delay slot. Currently, the delay
7802 slot of an indirect call sequence is not exposed and it is used by
7803 the sequence itself. */
7806 attr_length_indirect_call (rtx insn)
7808 unsigned long distance = -1;
7809 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7811 if (INSN_ADDRESSES_SET_P ())
7813 distance = (total + insn_current_reference_address (insn));
7814 if (distance < total)
7821 if (TARGET_FAST_INDIRECT_CALLS
7822 || (!TARGET_PORTABLE_RUNTIME
7823 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7824 || distance < 240000)))
7830 if (TARGET_PORTABLE_RUNTIME)
7833 /* Out of reach, can use ble. */
7838 output_indirect_call (rtx insn, rtx call_dest)
7844 xoperands[0] = call_dest;
7845 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7846 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7850 /* First the special case for kernels, level 0 systems, etc. */
7851 if (TARGET_FAST_INDIRECT_CALLS)
7852 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7854 /* Now the normal case -- we can reach $$dyncall directly or
7855 we're sure that we can get there via a long-branch stub.
7857 No need to check target flags as the length uniquely identifies
7858 the remaining cases. */
7859 if (attr_length_indirect_call (insn) == 8)
7861 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7862 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
7863 variant of the B,L instruction can't be used on the SOM target. */
7864 if (TARGET_PA_20 && !TARGET_SOM)
7865 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7867 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7870 /* Long millicode call, but we are not generating PIC or portable runtime
7872 if (attr_length_indirect_call (insn) == 12)
7873 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7875 /* Long millicode call for portable runtime. */
7876 if (attr_length_indirect_call (insn) == 20)
7877 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7879 /* We need a long PIC call to $$dyncall. */
7880 xoperands[0] = NULL_RTX;
7881 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7882 if (TARGET_SOM || !TARGET_GAS)
7884 xoperands[0] = gen_label_rtx ();
7885 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7886 targetm.asm_out.internal_label (asm_out_file, "L",
7887 CODE_LABEL_NUMBER (xoperands[0]));
7888 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7892 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7893 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7896 output_asm_insn ("blr %%r0,%%r2", xoperands);
7897 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7901 /* Return the total length of the save and restore instructions needed for
7902 the data linkage table pointer (i.e., the PIC register) across the call
7903 instruction INSN. No-return calls do not require a save and restore.
7904 In addition, we may be able to avoid the save and restore for calls
7905 within the same translation unit. */
7908 attr_length_save_restore_dltp (rtx insn)
7910 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7916 /* In HPUX 8.0's shared library scheme, special relocations are needed
7917 for function labels if they might be passed to a function
7918 in a shared library (because shared libraries don't live in code
7919 space), and special magic is needed to construct their address. */
7922 hppa_encode_label (rtx sym)
7924 const char *str = XSTR (sym, 0);
7925 int len = strlen (str) + 1;
7928 p = newstr = XALLOCAVEC (char, len + 1);
7932 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7936 pa_encode_section_info (tree decl, rtx rtl, int first)
7938 int old_referenced = 0;
7940 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
7942 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
7944 default_encode_section_info (decl, rtl, first);
7946 if (first && TEXT_SPACE_P (decl))
7948 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7949 if (TREE_CODE (decl) == FUNCTION_DECL)
7950 hppa_encode_label (XEXP (rtl, 0));
7952 else if (old_referenced)
7953 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
7956 /* This is sort of inverse to pa_encode_section_info. */
7959 pa_strip_name_encoding (const char *str)
7961 str += (*str == '@');
7962 str += (*str == '*');
7967 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7969 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7972 /* Returns 1 if OP is a function label involved in a simple addition
7973 with a constant. Used to keep certain patterns from matching
7974 during instruction combination. */
7976 is_function_label_plus_const (rtx op)
7978 /* Strip off any CONST. */
7979 if (GET_CODE (op) == CONST)
7982 return (GET_CODE (op) == PLUS
7983 && function_label_operand (XEXP (op, 0), Pmode)
7984 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7987 /* Output assembly code for a thunk to FUNCTION. */
7990 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7991 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7994 static unsigned int current_thunk_number;
7995 int val_14 = VAL_14_BITS_P (delta);
7996 unsigned int old_last_address = last_address, nbytes = 0;
8000 xoperands[0] = XEXP (DECL_RTL (function), 0);
8001 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8002 xoperands[2] = GEN_INT (delta);
8004 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8005 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8007 /* Output the thunk. We know that the function is in the same
8008 translation unit (i.e., the same space) as the thunk, and that
8009 thunks are output after their method. Thus, we don't need an
8010 external branch to reach the function. With SOM and GAS,
8011 functions and thunks are effectively in different sections.
8012 Thus, we can always use a IA-relative branch and the linker
8013 will add a long branch stub if necessary.
8015 However, we have to be careful when generating PIC code on the
8016 SOM port to ensure that the sequence does not transfer to an
8017 import stub for the target function as this could clobber the
8018 return value saved at SP-24. This would also apply to the
8019 32-bit linux port if the multi-space model is implemented. */
8020 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8021 && !(flag_pic && TREE_PUBLIC (function))
8022 && (TARGET_GAS || last_address < 262132))
8023 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8024 && ((targetm.have_named_sections
8025 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8026 /* The GNU 64-bit linker has rather poor stub management.
8027 So, we use a long branch from thunks that aren't in
8028 the same section as the target function. */
8030 && (DECL_SECTION_NAME (thunk_fndecl)
8031 != DECL_SECTION_NAME (function)))
8032 || ((DECL_SECTION_NAME (thunk_fndecl)
8033 == DECL_SECTION_NAME (function))
8034 && last_address < 262132)))
8035 || (targetm.have_named_sections
8036 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8037 && DECL_SECTION_NAME (function) == NULL
8038 && last_address < 262132)
8039 || (!targetm.have_named_sections && last_address < 262132))))
8042 output_asm_insn ("addil L'%2,%%r26", xoperands);
8044 output_asm_insn ("b %0", xoperands);
8048 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8053 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8057 else if (TARGET_64BIT)
8059 /* We only have one call-clobbered scratch register, so we can't
8060 make use of the delay slot if delta doesn't fit in 14 bits. */
8063 output_asm_insn ("addil L'%2,%%r26", xoperands);
8064 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8067 output_asm_insn ("b,l .+8,%%r1", xoperands);
8071 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8072 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8076 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8077 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8082 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8083 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8088 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8092 else if (TARGET_PORTABLE_RUNTIME)
8094 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8095 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8098 output_asm_insn ("addil L'%2,%%r26", xoperands);
8100 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8104 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8109 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8113 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8115 /* The function is accessible from outside this module. The only
8116 way to avoid an import stub between the thunk and function is to
8117 call the function directly with an indirect sequence similar to
8118 that used by $$dyncall. This is possible because $$dyncall acts
8119 as the import stub in an indirect call. */
8120 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8121 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8122 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8123 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8124 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8125 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8126 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8127 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8128 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8132 output_asm_insn ("addil L'%2,%%r26", xoperands);
8138 output_asm_insn ("bve (%%r22)", xoperands);
8141 else if (TARGET_NO_SPACE_REGS)
8143 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8148 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8149 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8150 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8155 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8157 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8161 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8163 if (TARGET_SOM || !TARGET_GAS)
8165 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8166 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8170 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8171 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8175 output_asm_insn ("addil L'%2,%%r26", xoperands);
8177 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8181 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8186 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8193 output_asm_insn ("addil L'%2,%%r26", xoperands);
8195 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8196 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8200 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8205 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8210 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8212 if (TARGET_SOM && TARGET_GAS)
8214 /* We done with this subspace except possibly for some additional
8215 debug information. Forget that we are in this subspace to ensure
8216 that the next function is output in its own subspace. */
8218 cfun->machine->in_nsubspa = 2;
8221 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8223 switch_to_section (data_section);
8224 output_asm_insn (".align 4", xoperands);
8225 ASM_OUTPUT_LABEL (file, label);
8226 output_asm_insn (".word P'%0", xoperands);
8229 current_thunk_number++;
8230 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8231 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8232 last_address += nbytes;
8233 if (old_last_address > last_address)
8234 last_address = UINT_MAX;
8235 update_total_code_bytes (nbytes);
8238 /* Only direct calls to static functions are allowed to be sibling (tail)
8241 This restriction is necessary because some linker generated stubs will
8242 store return pointers into rp' in some cases which might clobber a
8243 live value already in rp'.
8245 In a sibcall the current function and the target function share stack
8246 space. Thus if the path to the current function and the path to the
8247 target function save a value in rp', they save the value into the
8248 same stack slot, which has undesirable consequences.
8250 Because of the deferred binding nature of shared libraries any function
8251 with external scope could be in a different load module and thus require
8252 rp' to be saved when calling that function. So sibcall optimizations
8253 can only be safe for static function.
8255 Note that GCC never needs return value relocations, so we don't have to
8256 worry about static calls with return value relocations (which require
8259 It is safe to perform a sibcall optimization when the target function
8260 will never return. */
8262 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8264 if (TARGET_PORTABLE_RUNTIME)
8267 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8268 single subspace mode and the call is not indirect. As far as I know,
8269 there is no operating system support for the multiple subspace mode.
8270 It might be possible to support indirect calls if we didn't use
8271 $$dyncall (see the indirect sequence generated in output_call). */
8273 return (decl != NULL_TREE);
8275 /* Sibcalls are not ok because the arg pointer register is not a fixed
8276 register. This prevents the sibcall optimization from occurring. In
8277 addition, there are problems with stub placement using GNU ld. This
8278 is because a normal sibcall branch uses a 17-bit relocation while
8279 a regular call branch uses a 22-bit relocation. As a result, more
8280 care needs to be taken in the placement of long-branch stubs. */
8284 /* Sibcalls are only ok within a translation unit. */
8285 return (decl && !TREE_PUBLIC (decl));
8288 /* ??? Addition is not commutative on the PA due to the weird implicit
8289 space register selection rules for memory addresses. Therefore, we
8290 don't consider a + b == b + a, as this might be inside a MEM. */
8292 pa_commutative_p (const_rtx x, int outer_code)
8294 return (COMMUTATIVE_P (x)
8295 && (TARGET_NO_SPACE_REGS
8296 || (outer_code != UNKNOWN && outer_code != MEM)
8297 || GET_CODE (x) != PLUS));
8300 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8301 use in fmpyadd instructions. */
8303 fmpyaddoperands (rtx *operands)
8305 enum machine_mode mode = GET_MODE (operands[0]);
8307 /* Must be a floating point mode. */
8308 if (mode != SFmode && mode != DFmode)
8311 /* All modes must be the same. */
8312 if (! (mode == GET_MODE (operands[1])
8313 && mode == GET_MODE (operands[2])
8314 && mode == GET_MODE (operands[3])
8315 && mode == GET_MODE (operands[4])
8316 && mode == GET_MODE (operands[5])))
8319 /* All operands must be registers. */
8320 if (! (GET_CODE (operands[1]) == REG
8321 && GET_CODE (operands[2]) == REG
8322 && GET_CODE (operands[3]) == REG
8323 && GET_CODE (operands[4]) == REG
8324 && GET_CODE (operands[5]) == REG))
8327 /* Only 2 real operands to the addition. One of the input operands must
8328 be the same as the output operand. */
8329 if (! rtx_equal_p (operands[3], operands[4])
8330 && ! rtx_equal_p (operands[3], operands[5]))
8333 /* Inout operand of add cannot conflict with any operands from multiply. */
8334 if (rtx_equal_p (operands[3], operands[0])
8335 || rtx_equal_p (operands[3], operands[1])
8336 || rtx_equal_p (operands[3], operands[2]))
8339 /* multiply cannot feed into addition operands. */
8340 if (rtx_equal_p (operands[4], operands[0])
8341 || rtx_equal_p (operands[5], operands[0]))
8344 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8346 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8347 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8348 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8349 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8350 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8351 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8354 /* Passed. Operands are suitable for fmpyadd. */
8358 #if !defined(USE_COLLECT2)
8360 pa_asm_out_constructor (rtx symbol, int priority)
8362 if (!function_label_operand (symbol, VOIDmode))
8363 hppa_encode_label (symbol);
8365 #ifdef CTORS_SECTION_ASM_OP
8366 default_ctor_section_asm_out_constructor (symbol, priority);
8368 # ifdef TARGET_ASM_NAMED_SECTION
8369 default_named_section_asm_out_constructor (symbol, priority);
8371 default_stabs_asm_out_constructor (symbol, priority);
8377 pa_asm_out_destructor (rtx symbol, int priority)
8379 if (!function_label_operand (symbol, VOIDmode))
8380 hppa_encode_label (symbol);
8382 #ifdef DTORS_SECTION_ASM_OP
8383 default_dtor_section_asm_out_destructor (symbol, priority);
8385 # ifdef TARGET_ASM_NAMED_SECTION
8386 default_named_section_asm_out_destructor (symbol, priority);
8388 default_stabs_asm_out_destructor (symbol, priority);
8394 /* This function places uninitialized global data in the bss section.
8395 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8396 function on the SOM port to prevent uninitialized global data from
8397 being placed in the data section. */
8400 pa_asm_output_aligned_bss (FILE *stream,
8402 unsigned HOST_WIDE_INT size,
8405 switch_to_section (bss_section);
8406 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8408 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8409 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8412 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8413 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8416 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8417 ASM_OUTPUT_LABEL (stream, name);
8418 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8421 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8422 that doesn't allow the alignment of global common storage to be directly
8423 specified. The SOM linker aligns common storage based on the rounded
8424 value of the NUM_BYTES parameter in the .comm directive. It's not
8425 possible to use the .align directive as it doesn't affect the alignment
8426 of the label associated with a .comm directive. */
8429 pa_asm_output_aligned_common (FILE *stream,
8431 unsigned HOST_WIDE_INT size,
8434 unsigned int max_common_align;
8436 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8437 if (align > max_common_align)
8439 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8440 "for global common data. Using %u",
8441 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8442 align = max_common_align;
8445 switch_to_section (bss_section);
8447 assemble_name (stream, name);
8448 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8449 MAX (size, align / BITS_PER_UNIT));
8452 /* We can't use .comm for local common storage as the SOM linker effectively
8453 treats the symbol as universal and uses the same storage for local symbols
8454 with the same name in different object files. The .block directive
8455 reserves an uninitialized block of storage. However, it's not common
8456 storage. Fortunately, GCC never requests common storage with the same
8457 name in any given translation unit. */
8460 pa_asm_output_aligned_local (FILE *stream,
8462 unsigned HOST_WIDE_INT size,
8465 switch_to_section (bss_section);
8466 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8469 fprintf (stream, "%s", LOCAL_ASM_OP);
8470 assemble_name (stream, name);
8471 fprintf (stream, "\n");
8474 ASM_OUTPUT_LABEL (stream, name);
8475 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8478 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8479 use in fmpysub instructions. */
8481 fmpysuboperands (rtx *operands)
8483 enum machine_mode mode = GET_MODE (operands[0]);
8485 /* Must be a floating point mode. */
8486 if (mode != SFmode && mode != DFmode)
8489 /* All modes must be the same. */
8490 if (! (mode == GET_MODE (operands[1])
8491 && mode == GET_MODE (operands[2])
8492 && mode == GET_MODE (operands[3])
8493 && mode == GET_MODE (operands[4])
8494 && mode == GET_MODE (operands[5])))
8497 /* All operands must be registers. */
8498 if (! (GET_CODE (operands[1]) == REG
8499 && GET_CODE (operands[2]) == REG
8500 && GET_CODE (operands[3]) == REG
8501 && GET_CODE (operands[4]) == REG
8502 && GET_CODE (operands[5]) == REG))
8505 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8506 operation, so operands[4] must be the same as operand[3]. */
8507 if (! rtx_equal_p (operands[3], operands[4]))
8510 /* multiply cannot feed into subtraction. */
8511 if (rtx_equal_p (operands[5], operands[0]))
8514 /* Inout operand of sub cannot conflict with any operands from multiply. */
8515 if (rtx_equal_p (operands[3], operands[0])
8516 || rtx_equal_p (operands[3], operands[1])
8517 || rtx_equal_p (operands[3], operands[2]))
8520 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8522 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8523 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8524 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8525 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8526 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8527 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8530 /* Passed. Operands are suitable for fmpysub. */
8534 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8535 constants for shadd instructions. */
8537 shadd_constant_p (int val)
8539 if (val == 2 || val == 4 || val == 8)
8545 /* Return 1 if OP is valid as a base or index register in a
8549 borx_reg_operand (rtx op, enum machine_mode mode)
8551 if (GET_CODE (op) != REG)
8554 /* We must reject virtual registers as the only expressions that
8555 can be instantiated are REG and REG+CONST. */
8556 if (op == virtual_incoming_args_rtx
8557 || op == virtual_stack_vars_rtx
8558 || op == virtual_stack_dynamic_rtx
8559 || op == virtual_outgoing_args_rtx
8560 || op == virtual_cfa_rtx)
8563 /* While it's always safe to index off the frame pointer, it's not
8564 profitable to do so when the frame pointer is being eliminated. */
8565 if (!reload_completed
8566 && flag_omit_frame_pointer
8567 && !cfun->calls_alloca
8568 && op == frame_pointer_rtx)
8571 return register_operand (op, mode);
8574 /* Return 1 if this operand is anything other than a hard register. */
8577 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8579 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8582 /* Return TRUE if INSN branches forward. */
8585 forward_branch_p (rtx insn)
8587 rtx lab = JUMP_LABEL (insn);
8589 /* The INSN must have a jump label. */
8590 gcc_assert (lab != NULL_RTX);
8592 if (INSN_ADDRESSES_SET_P ())
8593 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8600 insn = NEXT_INSN (insn);
8606 /* Return 1 if OP is an equality comparison, else return 0. */
8608 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8610 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8613 /* Return 1 if INSN is in the delay slot of a call instruction. */
8615 jump_in_call_delay (rtx insn)
8618 if (GET_CODE (insn) != JUMP_INSN)
8621 if (PREV_INSN (insn)
8622 && PREV_INSN (PREV_INSN (insn))
8623 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8625 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8627 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8628 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8635 /* Output an unconditional move and branch insn. */
8638 output_parallel_movb (rtx *operands, rtx insn)
8640 int length = get_attr_length (insn);
8642 /* These are the cases in which we win. */
8644 return "mov%I1b,tr %1,%0,%2";
8646 /* None of the following cases win, but they don't lose either. */
8649 if (dbr_sequence_length () == 0)
8651 /* Nothing in the delay slot, fake it by putting the combined
8652 insn (the copy or add) in the delay slot of a bl. */
8653 if (GET_CODE (operands[1]) == CONST_INT)
8654 return "b %2\n\tldi %1,%0";
8656 return "b %2\n\tcopy %1,%0";
8660 /* Something in the delay slot, but we've got a long branch. */
8661 if (GET_CODE (operands[1]) == CONST_INT)
8662 return "ldi %1,%0\n\tb %2";
8664 return "copy %1,%0\n\tb %2";
8668 if (GET_CODE (operands[1]) == CONST_INT)
8669 output_asm_insn ("ldi %1,%0", operands);
8671 output_asm_insn ("copy %1,%0", operands);
8672 return output_lbranch (operands[2], insn, 1);
8675 /* Output an unconditional add and branch insn. */
8678 output_parallel_addb (rtx *operands, rtx insn)
8680 int length = get_attr_length (insn);
8682 /* To make life easy we want operand0 to be the shared input/output
8683 operand and operand1 to be the readonly operand. */
8684 if (operands[0] == operands[1])
8685 operands[1] = operands[2];
8687 /* These are the cases in which we win. */
8689 return "add%I1b,tr %1,%0,%3";
8691 /* None of the following cases win, but they don't lose either. */
8694 if (dbr_sequence_length () == 0)
8695 /* Nothing in the delay slot, fake it by putting the combined
8696 insn (the copy or add) in the delay slot of a bl. */
8697 return "b %3\n\tadd%I1 %1,%0,%0";
8699 /* Something in the delay slot, but we've got a long branch. */
8700 return "add%I1 %1,%0,%0\n\tb %3";
8703 output_asm_insn ("add%I1 %1,%0,%0", operands);
8704 return output_lbranch (operands[3], insn, 1);
8707 /* Return nonzero if INSN (a jump insn) immediately follows a call
8708 to a named function. This is used to avoid filling the delay slot
8709 of the jump since it can usually be eliminated by modifying RP in
8710 the delay slot of the call. */
8713 following_call (rtx insn)
8715 if (! TARGET_JUMP_IN_DELAY)
8718 /* Find the previous real insn, skipping NOTEs. */
8719 insn = PREV_INSN (insn);
8720 while (insn && GET_CODE (insn) == NOTE)
8721 insn = PREV_INSN (insn);
8723 /* Check for CALL_INSNs and millicode calls. */
8725 && ((GET_CODE (insn) == CALL_INSN
8726 && get_attr_type (insn) != TYPE_DYNCALL)
8727 || (GET_CODE (insn) == INSN
8728 && GET_CODE (PATTERN (insn)) != SEQUENCE
8729 && GET_CODE (PATTERN (insn)) != USE
8730 && GET_CODE (PATTERN (insn)) != CLOBBER
8731 && get_attr_type (insn) == TYPE_MILLI)))
8737 /* We use this hook to perform a PA specific optimization which is difficult
8738 to do in earlier passes.
8740 We want the delay slots of branches within jump tables to be filled.
8741 None of the compiler passes at the moment even has the notion that a
8742 PA jump table doesn't contain addresses, but instead contains actual
8745 Because we actually jump into the table, the addresses of each entry
8746 must stay constant in relation to the beginning of the table (which
8747 itself must stay constant relative to the instruction to jump into
8748 it). I don't believe we can guarantee earlier passes of the compiler
8749 will adhere to those rules.
8751 So, late in the compilation process we find all the jump tables, and
8752 expand them into real code -- e.g. each entry in the jump table vector
8753 will get an appropriate label followed by a jump to the final target.
8755 Reorg and the final jump pass can then optimize these branches and
8756 fill their delay slots. We end up with smaller, more efficient code.
8758 The jump instructions within the table are special; we must be able
8759 to identify them during assembly output (if the jumps don't get filled
8760 we need to emit a nop rather than nullifying the delay slot)). We
8761 identify jumps in switch tables by using insns with the attribute
8762 type TYPE_BTABLE_BRANCH.
8764 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8765 insns. This serves two purposes, first it prevents jump.c from
8766 noticing that the last N entries in the table jump to the instruction
8767 immediately after the table and deleting the jumps. Second, those
8768 insns mark where we should emit .begin_brtab and .end_brtab directives
8769 when using GAS (allows for better link time optimizations). */
8776 remove_useless_addtr_insns (1);
8778 if (pa_cpu < PROCESSOR_8000)
8779 pa_combine_instructions ();
8782 /* This is fairly cheap, so always run it if optimizing. */
8783 if (optimize > 0 && !TARGET_BIG_SWITCH)
8785 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8786 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8788 rtx pattern, tmp, location, label;
8789 unsigned int length, i;
8791 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8792 if (GET_CODE (insn) != JUMP_INSN
8793 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8794 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8797 /* Emit marker for the beginning of the branch table. */
8798 emit_insn_before (gen_begin_brtab (), insn);
8800 pattern = PATTERN (insn);
8801 location = PREV_INSN (insn);
8802 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8804 for (i = 0; i < length; i++)
8806 /* Emit a label before each jump to keep jump.c from
8807 removing this code. */
8808 tmp = gen_label_rtx ();
8809 LABEL_NUSES (tmp) = 1;
8810 emit_label_after (tmp, location);
8811 location = NEXT_INSN (location);
8813 if (GET_CODE (pattern) == ADDR_VEC)
8814 label = XEXP (XVECEXP (pattern, 0, i), 0);
8816 label = XEXP (XVECEXP (pattern, 1, i), 0);
8818 tmp = gen_short_jump (label);
8820 /* Emit the jump itself. */
8821 tmp = emit_jump_insn_after (tmp, location);
8822 JUMP_LABEL (tmp) = label;
8823 LABEL_NUSES (label)++;
8824 location = NEXT_INSN (location);
8826 /* Emit a BARRIER after the jump. */
8827 emit_barrier_after (location);
8828 location = NEXT_INSN (location);
8831 /* Emit marker for the end of the branch table. */
8832 emit_insn_before (gen_end_brtab (), location);
8833 location = NEXT_INSN (location);
8834 emit_barrier_after (location);
8836 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8842 /* Still need brtab marker insns. FIXME: the presence of these
8843 markers disables output of the branch table to readonly memory,
8844 and any alignment directives that might be needed. Possibly,
8845 the begin_brtab insn should be output before the label for the
8846 table. This doesn't matter at the moment since the tables are
8847 always output in the text section. */
8848 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8850 /* Find an ADDR_VEC insn. */
8851 if (GET_CODE (insn) != JUMP_INSN
8852 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8853 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8856 /* Now generate markers for the beginning and end of the
8858 emit_insn_before (gen_begin_brtab (), insn);
8859 emit_insn_after (gen_end_brtab (), insn);
8864 /* The PA has a number of odd instructions which can perform multiple
8865 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8866 it may be profitable to combine two instructions into one instruction
8867 with two outputs. It's not profitable PA2.0 machines because the
8868 two outputs would take two slots in the reorder buffers.
8870 This routine finds instructions which can be combined and combines
8871 them. We only support some of the potential combinations, and we
8872 only try common ways to find suitable instructions.
8874 * addb can add two registers or a register and a small integer
8875 and jump to a nearby (+-8k) location. Normally the jump to the
8876 nearby location is conditional on the result of the add, but by
8877 using the "true" condition we can make the jump unconditional.
8878 Thus addb can perform two independent operations in one insn.
8880 * movb is similar to addb in that it can perform a reg->reg
8881 or small immediate->reg copy and jump to a nearby (+-8k location).
8883 * fmpyadd and fmpysub can perform a FP multiply and either an
8884 FP add or FP sub if the operands of the multiply and add/sub are
8885 independent (there are other minor restrictions). Note both
8886 the fmpy and fadd/fsub can in theory move to better spots according
8887 to data dependencies, but for now we require the fmpy stay at a
8890 * Many of the memory operations can perform pre & post updates
8891 of index registers. GCC's pre/post increment/decrement addressing
8892 is far too simple to take advantage of all the possibilities. This
8893 pass may not be suitable since those insns may not be independent.
8895 * comclr can compare two ints or an int and a register, nullify
8896 the following instruction and zero some other register. This
8897 is more difficult to use as it's harder to find an insn which
8898 will generate a comclr than finding something like an unconditional
8899 branch. (conditional moves & long branches create comclr insns).
8901 * Most arithmetic operations can conditionally skip the next
8902 instruction. They can be viewed as "perform this operation
8903 and conditionally jump to this nearby location" (where nearby
8904 is an insns away). These are difficult to use due to the
8905 branch length restrictions. */
8908 pa_combine_instructions (void)
8910 rtx anchor, new_rtx;
8912 /* This can get expensive since the basic algorithm is on the
8913 order of O(n^2) (or worse). Only do it for -O2 or higher
8914 levels of optimization. */
8918 /* Walk down the list of insns looking for "anchor" insns which
8919 may be combined with "floating" insns. As the name implies,
8920 "anchor" instructions don't move, while "floating" insns may
8922 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8923 new_rtx = make_insn_raw (new_rtx);
8925 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8927 enum attr_pa_combine_type anchor_attr;
8928 enum attr_pa_combine_type floater_attr;
8930 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8931 Also ignore any special USE insns. */
8932 if ((GET_CODE (anchor) != INSN
8933 && GET_CODE (anchor) != JUMP_INSN
8934 && GET_CODE (anchor) != CALL_INSN)
8935 || GET_CODE (PATTERN (anchor)) == USE
8936 || GET_CODE (PATTERN (anchor)) == CLOBBER
8937 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8938 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8941 anchor_attr = get_attr_pa_combine_type (anchor);
8942 /* See if anchor is an insn suitable for combination. */
8943 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8944 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8945 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8946 && ! forward_branch_p (anchor)))
8950 for (floater = PREV_INSN (anchor);
8952 floater = PREV_INSN (floater))
8954 if (GET_CODE (floater) == NOTE
8955 || (GET_CODE (floater) == INSN
8956 && (GET_CODE (PATTERN (floater)) == USE
8957 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8960 /* Anything except a regular INSN will stop our search. */
8961 if (GET_CODE (floater) != INSN
8962 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8963 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8969 /* See if FLOATER is suitable for combination with the
8971 floater_attr = get_attr_pa_combine_type (floater);
8972 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8973 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8974 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8975 && floater_attr == PA_COMBINE_TYPE_FMPY))
8977 /* If ANCHOR and FLOATER can be combined, then we're
8978 done with this pass. */
8979 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
8980 SET_DEST (PATTERN (floater)),
8981 XEXP (SET_SRC (PATTERN (floater)), 0),
8982 XEXP (SET_SRC (PATTERN (floater)), 1)))
8986 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8987 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8989 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8991 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
8992 SET_DEST (PATTERN (floater)),
8993 XEXP (SET_SRC (PATTERN (floater)), 0),
8994 XEXP (SET_SRC (PATTERN (floater)), 1)))
8999 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9000 SET_DEST (PATTERN (floater)),
9001 SET_SRC (PATTERN (floater)),
9002 SET_SRC (PATTERN (floater))))
9008 /* If we didn't find anything on the backwards scan try forwards. */
9010 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9011 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9013 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9015 if (GET_CODE (floater) == NOTE
9016 || (GET_CODE (floater) == INSN
9017 && (GET_CODE (PATTERN (floater)) == USE
9018 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9022 /* Anything except a regular INSN will stop our search. */
9023 if (GET_CODE (floater) != INSN
9024 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9025 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9031 /* See if FLOATER is suitable for combination with the
9033 floater_attr = get_attr_pa_combine_type (floater);
9034 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9035 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9036 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9037 && floater_attr == PA_COMBINE_TYPE_FMPY))
9039 /* If ANCHOR and FLOATER can be combined, then we're
9040 done with this pass. */
9041 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9042 SET_DEST (PATTERN (floater)),
9043 XEXP (SET_SRC (PATTERN (floater)),
9045 XEXP (SET_SRC (PATTERN (floater)),
9052 /* FLOATER will be nonzero if we found a suitable floating
9053 insn for combination with ANCHOR. */
9055 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9056 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9058 /* Emit the new instruction and delete the old anchor. */
9059 emit_insn_before (gen_rtx_PARALLEL
9061 gen_rtvec (2, PATTERN (anchor),
9062 PATTERN (floater))),
9065 SET_INSN_DELETED (anchor);
9067 /* Emit a special USE insn for FLOATER, then delete
9068 the floating insn. */
9069 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9070 delete_insn (floater);
9075 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9078 /* Emit the new_jump instruction and delete the old anchor. */
9080 = emit_jump_insn_before (gen_rtx_PARALLEL
9082 gen_rtvec (2, PATTERN (anchor),
9083 PATTERN (floater))),
9086 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9087 SET_INSN_DELETED (anchor);
9089 /* Emit a special USE insn for FLOATER, then delete
9090 the floating insn. */
9091 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9092 delete_insn (floater);
9100 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
9103 int insn_code_number;
9106 /* Create a PARALLEL with the patterns of ANCHOR and
9107 FLOATER, try to recognize it, then test constraints
9108 for the resulting pattern.
9110 If the pattern doesn't match or the constraints
9111 aren't met keep searching for a suitable floater
9113 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9114 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9115 INSN_CODE (new_rtx) = -1;
9116 insn_code_number = recog_memoized (new_rtx);
9117 if (insn_code_number < 0
9118 || (extract_insn (new_rtx), ! constrain_operands (1)))
9132 /* There's up to three operands to consider. One
9133 output and two inputs.
9135 The output must not be used between FLOATER & ANCHOR
9136 exclusive. The inputs must not be set between
9137 FLOATER and ANCHOR exclusive. */
9139 if (reg_used_between_p (dest, start, end))
9142 if (reg_set_between_p (src1, start, end))
9145 if (reg_set_between_p (src2, start, end))
9148 /* If we get here, then everything is good. */
9152 /* Return nonzero if references for INSN are delayed.
9154 Millicode insns are actually function calls with some special
9155 constraints on arguments and register usage.
9157 Millicode calls always expect their arguments in the integer argument
9158 registers, and always return their result in %r29 (ret1). They
9159 are expected to clobber their arguments, %r1, %r29, and the return
9160 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9162 This function tells reorg that the references to arguments and
9163 millicode calls do not appear to happen until after the millicode call.
9164 This allows reorg to put insns which set the argument registers into the
9165 delay slot of the millicode call -- thus they act more like traditional
9168 Note we cannot consider side effects of the insn to be delayed because
9169 the branch and link insn will clobber the return pointer. If we happened
9170 to use the return pointer in the delay slot of the call, then we lose.
9172 get_attr_type will try to recognize the given insn, so make sure to
9173 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9176 insn_refs_are_delayed (rtx insn)
9178 return ((GET_CODE (insn) == INSN
9179 && GET_CODE (PATTERN (insn)) != SEQUENCE
9180 && GET_CODE (PATTERN (insn)) != USE
9181 && GET_CODE (PATTERN (insn)) != CLOBBER
9182 && get_attr_type (insn) == TYPE_MILLI));
9185 /* On the HP-PA the value is found in register(s) 28(-29), unless
9186 the mode is SF or DF. Then the value is returned in fr4 (32).
9188 This must perform the same promotions as PROMOTE_MODE, else
9189 TARGET_PROMOTE_FUNCTION_RETURN will not work correctly.
9191 Small structures must be returned in a PARALLEL on PA64 in order
9192 to match the HP Compiler ABI. */
9195 function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED)
9197 enum machine_mode valmode;
9199 if (AGGREGATE_TYPE_P (valtype)
9200 || TREE_CODE (valtype) == COMPLEX_TYPE
9201 || TREE_CODE (valtype) == VECTOR_TYPE)
9205 /* Aggregates with a size less than or equal to 128 bits are
9206 returned in GR 28(-29). They are left justified. The pad
9207 bits are undefined. Larger aggregates are returned in
9211 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9213 for (i = 0; i < ub; i++)
9215 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9216 gen_rtx_REG (DImode, 28 + i),
9221 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9223 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9225 /* Aggregates 5 to 8 bytes in size are returned in general
9226 registers r28-r29 in the same manner as other non
9227 floating-point objects. The data is right-justified and
9228 zero-extended to 64 bits. This is opposite to the normal
9229 justification used on big endian targets and requires
9230 special treatment. */
9231 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9232 gen_rtx_REG (DImode, 28), const0_rtx);
9233 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9237 if ((INTEGRAL_TYPE_P (valtype)
9238 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9239 || POINTER_TYPE_P (valtype))
9240 valmode = word_mode;
9242 valmode = TYPE_MODE (valtype);
9244 if (TREE_CODE (valtype) == REAL_TYPE
9245 && !AGGREGATE_TYPE_P (valtype)
9246 && TYPE_MODE (valtype) != TFmode
9247 && !TARGET_SOFT_FLOAT)
9248 return gen_rtx_REG (valmode, 32);
9250 return gen_rtx_REG (valmode, 28);
9253 /* Return the location of a parameter that is passed in a register or NULL
9254 if the parameter has any component that is passed in memory.
9256 This is new code and will be pushed to into the net sources after
9259 ??? We might want to restructure this so that it looks more like other
9262 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
9263 int named ATTRIBUTE_UNUSED)
9265 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9272 if (mode == VOIDmode)
9275 arg_size = FUNCTION_ARG_SIZE (mode, type);
9277 /* If this arg would be passed partially or totally on the stack, then
9278 this routine should return zero. pa_arg_partial_bytes will
9279 handle arguments which are split between regs and stack slots if
9280 the ABI mandates split arguments. */
9283 /* The 32-bit ABI does not split arguments. */
9284 if (cum->words + arg_size > max_arg_words)
9290 alignment = cum->words & 1;
9291 if (cum->words + alignment >= max_arg_words)
9295 /* The 32bit ABIs and the 64bit ABIs are rather different,
9296 particularly in their handling of FP registers. We might
9297 be able to cleverly share code between them, but I'm not
9298 going to bother in the hope that splitting them up results
9299 in code that is more easily understood. */
9303 /* Advance the base registers to their current locations.
9305 Remember, gprs grow towards smaller register numbers while
9306 fprs grow to higher register numbers. Also remember that
9307 although FP regs are 32-bit addressable, we pretend that
9308 the registers are 64-bits wide. */
9309 gpr_reg_base = 26 - cum->words;
9310 fpr_reg_base = 32 + cum->words;
9312 /* Arguments wider than one word and small aggregates need special
9316 || (type && (AGGREGATE_TYPE_P (type)
9317 || TREE_CODE (type) == COMPLEX_TYPE
9318 || TREE_CODE (type) == VECTOR_TYPE)))
9320 /* Double-extended precision (80-bit), quad-precision (128-bit)
9321 and aggregates including complex numbers are aligned on
9322 128-bit boundaries. The first eight 64-bit argument slots
9323 are associated one-to-one, with general registers r26
9324 through r19, and also with floating-point registers fr4
9325 through fr11. Arguments larger than one word are always
9326 passed in general registers.
9328 Using a PARALLEL with a word mode register results in left
9329 justified data on a big-endian target. */
9332 int i, offset = 0, ub = arg_size;
9334 /* Align the base register. */
9335 gpr_reg_base -= alignment;
9337 ub = MIN (ub, max_arg_words - cum->words - alignment);
9338 for (i = 0; i < ub; i++)
9340 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9341 gen_rtx_REG (DImode, gpr_reg_base),
9347 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9352 /* If the argument is larger than a word, then we know precisely
9353 which registers we must use. */
9367 /* Structures 5 to 8 bytes in size are passed in the general
9368 registers in the same manner as other non floating-point
9369 objects. The data is right-justified and zero-extended
9370 to 64 bits. This is opposite to the normal justification
9371 used on big endian targets and requires special treatment.
9372 We now define BLOCK_REG_PADDING to pad these objects.
9373 Aggregates, complex and vector types are passed in the same
9374 manner as structures. */
9376 || (type && (AGGREGATE_TYPE_P (type)
9377 || TREE_CODE (type) == COMPLEX_TYPE
9378 || TREE_CODE (type) == VECTOR_TYPE)))
9380 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9381 gen_rtx_REG (DImode, gpr_reg_base),
9383 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9388 /* We have a single word (32 bits). A simple computation
9389 will get us the register #s we need. */
9390 gpr_reg_base = 26 - cum->words;
9391 fpr_reg_base = 32 + 2 * cum->words;
9395 /* Determine if the argument needs to be passed in both general and
9396 floating point registers. */
9397 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9398 /* If we are doing soft-float with portable runtime, then there
9399 is no need to worry about FP regs. */
9400 && !TARGET_SOFT_FLOAT
9401 /* The parameter must be some kind of scalar float, else we just
9402 pass it in integer registers. */
9403 && GET_MODE_CLASS (mode) == MODE_FLOAT
9404 /* The target function must not have a prototype. */
9405 && cum->nargs_prototype <= 0
9406 /* libcalls do not need to pass items in both FP and general
9408 && type != NULL_TREE
9409 /* All this hair applies to "outgoing" args only. This includes
9410 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9412 /* Also pass outgoing floating arguments in both registers in indirect
9413 calls with the 32 bit ABI and the HP assembler since there is no
9414 way to the specify argument locations in static functions. */
9419 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9425 gen_rtx_EXPR_LIST (VOIDmode,
9426 gen_rtx_REG (mode, fpr_reg_base),
9428 gen_rtx_EXPR_LIST (VOIDmode,
9429 gen_rtx_REG (mode, gpr_reg_base),
9434 /* See if we should pass this parameter in a general register. */
9435 if (TARGET_SOFT_FLOAT
9436 /* Indirect calls in the normal 32bit ABI require all arguments
9437 to be passed in general registers. */
9438 || (!TARGET_PORTABLE_RUNTIME
9442 /* If the parameter is not a scalar floating-point parameter,
9443 then it belongs in GPRs. */
9444 || GET_MODE_CLASS (mode) != MODE_FLOAT
9445 /* Structure with single SFmode field belongs in GPR. */
9446 || (type && AGGREGATE_TYPE_P (type)))
9447 retval = gen_rtx_REG (mode, gpr_reg_base);
9449 retval = gen_rtx_REG (mode, fpr_reg_base);
9455 /* If this arg would be passed totally in registers or totally on the stack,
9456 then this routine should return zero. */
9459 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9460 tree type, bool named ATTRIBUTE_UNUSED)
9462 unsigned int max_arg_words = 8;
9463 unsigned int offset = 0;
9468 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9471 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9472 /* Arg fits fully into registers. */
9474 else if (cum->words + offset >= max_arg_words)
9475 /* Arg fully on the stack. */
9479 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9483 /* A get_unnamed_section callback for switching to the text section.
9485 This function is only used with SOM. Because we don't support
9486 named subspaces, we can only create a new subspace or switch back
9487 to the default text subspace. */
9490 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9492 gcc_assert (TARGET_SOM);
9495 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9497 /* We only want to emit a .nsubspa directive once at the
9498 start of the function. */
9499 cfun->machine->in_nsubspa = 1;
9501 /* Create a new subspace for the text. This provides
9502 better stub placement and one-only functions. */
9504 && DECL_ONE_ONLY (cfun->decl)
9505 && !DECL_WEAK (cfun->decl))
9507 output_section_asm_op ("\t.SPACE $TEXT$\n"
9508 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9509 "ACCESS=44,SORT=24,COMDAT");
9515 /* There isn't a current function or the body of the current
9516 function has been completed. So, we are changing to the
9517 text section to output debugging information. Thus, we
9518 need to forget that we are in the text section so that
9519 varasm.c will call us when text_section is selected again. */
9520 gcc_assert (!cfun || !cfun->machine
9521 || cfun->machine->in_nsubspa == 2);
9524 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9527 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9530 /* A get_unnamed_section callback for switching to comdat data
9531 sections. This function is only used with SOM. */
9534 som_output_comdat_data_section_asm_op (const void *data)
9537 output_section_asm_op (data);
9540 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9543 pa_som_asm_init_sections (void)
9546 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9548 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9549 is not being generated. */
9550 som_readonly_data_section
9551 = get_unnamed_section (0, output_section_asm_op,
9552 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9554 /* When secondary definitions are not supported, SOM makes readonly
9555 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9557 som_one_only_readonly_data_section
9558 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9560 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9561 "ACCESS=0x2c,SORT=16,COMDAT");
9564 /* When secondary definitions are not supported, SOM makes data one-only
9565 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9566 som_one_only_data_section
9567 = get_unnamed_section (SECTION_WRITE,
9568 som_output_comdat_data_section_asm_op,
9569 "\t.SPACE $PRIVATE$\n"
9570 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9571 "ACCESS=31,SORT=24,COMDAT");
9573 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9574 which reference data within the $TEXT$ space (for example constant
9575 strings in the $LIT$ subspace).
9577 The assemblers (GAS and HP as) both have problems with handling
9578 the difference of two symbols which is the other correct way to
9579 reference constant data during PIC code generation.
9581 So, there's no way to reference constant data which is in the
9582 $TEXT$ space during PIC generation. Instead place all constant
9583 data into the $PRIVATE$ subspace (this reduces sharing, but it
9584 works correctly). */
9585 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9587 /* We must not have a reference to an external symbol defined in a
9588 shared library in a readonly section, else the SOM linker will
9591 So, we force exception information into the data section. */
9592 exception_section = data_section;
9595 /* On hpux10, the linker will give an error if we have a reference
9596 in the read-only data section to a symbol defined in a shared
9597 library. Therefore, expressions that might require a reloc can
9598 not be placed in the read-only data section. */
9601 pa_select_section (tree exp, int reloc,
9602 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9604 if (TREE_CODE (exp) == VAR_DECL
9605 && TREE_READONLY (exp)
9606 && !TREE_THIS_VOLATILE (exp)
9607 && DECL_INITIAL (exp)
9608 && (DECL_INITIAL (exp) == error_mark_node
9609 || TREE_CONSTANT (DECL_INITIAL (exp)))
9613 && DECL_ONE_ONLY (exp)
9614 && !DECL_WEAK (exp))
9615 return som_one_only_readonly_data_section;
9617 return readonly_data_section;
9619 else if (CONSTANT_CLASS_P (exp) && !reloc)
9620 return readonly_data_section;
9622 && TREE_CODE (exp) == VAR_DECL
9623 && DECL_ONE_ONLY (exp)
9624 && !DECL_WEAK (exp))
9625 return som_one_only_data_section;
9627 return data_section;
9631 pa_globalize_label (FILE *stream, const char *name)
9633 /* We only handle DATA objects here, functions are globalized in
9634 ASM_DECLARE_FUNCTION_NAME. */
9635 if (! FUNCTION_NAME_P (name))
9637 fputs ("\t.EXPORT ", stream);
9638 assemble_name (stream, name);
9639 fputs (",DATA\n", stream);
9643 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9646 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9647 int incoming ATTRIBUTE_UNUSED)
9649 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9652 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9655 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9657 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9658 PA64 ABI says that objects larger than 128 bits are returned in memory.
9659 Note, int_size_in_bytes can return -1 if the size of the object is
9660 variable or larger than the maximum value that can be expressed as
9661 a HOST_WIDE_INT. It can also return zero for an empty type. The
9662 simplest way to handle variable and empty types is to pass them in
9663 memory. This avoids problems in defining the boundaries of argument
9664 slots, allocating registers, etc. */
9665 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9666 || int_size_in_bytes (type) <= 0);
9669 /* Structure to hold declaration and name of external symbols that are
9670 emitted by GCC. We generate a vector of these symbols and output them
9671 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9672 This avoids putting out names that are never really used. */
9674 typedef struct GTY(()) extern_symbol
9680 /* Define gc'd vector type for extern_symbol. */
9681 DEF_VEC_O(extern_symbol);
9682 DEF_VEC_ALLOC_O(extern_symbol,gc);
9684 /* Vector of extern_symbol pointers. */
9685 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9687 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9688 /* Mark DECL (name NAME) as an external reference (assembler output
9689 file FILE). This saves the names to output at the end of the file
9690 if actually referenced. */
9693 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9695 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9697 gcc_assert (file == asm_out_file);
9702 /* Output text required at the end of an assembler file.
9703 This includes deferred plabels and .import directives for
9704 all external symbols that were actually referenced. */
9707 pa_hpux_file_end (void)
9712 if (!NO_DEFERRED_PROFILE_COUNTERS)
9713 output_deferred_profile_counters ();
9715 output_deferred_plabels ();
9717 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9719 tree decl = p->decl;
9721 if (!TREE_ASM_WRITTEN (decl)
9722 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9723 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9726 VEC_free (extern_symbol, gc, extern_symbols);
9730 /* Return true if a change from mode FROM to mode TO for a register
9731 in register class RCLASS is invalid. */
9734 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9735 enum reg_class rclass)
9740 /* Reject changes to/from complex and vector modes. */
9741 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9742 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9745 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9748 /* There is no way to load QImode or HImode values directly from
9749 memory. SImode loads to the FP registers are not zero extended.
9750 On the 64-bit target, this conflicts with the definition of
9751 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9752 with different sizes in the floating-point registers. */
9753 if (MAYBE_FP_REG_CLASS_P (rclass))
9756 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9757 in specific sets of registers. Thus, we cannot allow changing
9758 to a larger mode when it's larger than a word. */
9759 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9760 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9766 /* Returns TRUE if it is a good idea to tie two pseudo registers
9767 when one has mode MODE1 and one has mode MODE2.
9768 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9769 for any hard reg, then this must be FALSE for correct output.
9771 We should return FALSE for QImode and HImode because these modes
9772 are not ok in the floating-point registers. However, this prevents
9773 tieing these modes to SImode and DImode in the general registers.
9774 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9775 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9776 in the floating-point registers. */
9779 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9781 /* Don't tie modes in different classes. */
9782 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))