1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "integrate.h"
49 #include "target-def.h"
52 /* Return nonzero if there is a bypass for the output of
53 OUT_INSN and the fp store IN_INSN. */
55 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
57 enum machine_mode store_mode;
58 enum machine_mode other_mode;
61 if (recog_memoized (in_insn) < 0
62 || (get_attr_type (in_insn) != TYPE_FPSTORE
63 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
64 || recog_memoized (out_insn) < 0)
67 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
69 set = single_set (out_insn);
73 other_mode = GET_MODE (SET_SRC (set));
75 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
79 #ifndef DO_FRAME_NOTES
80 #ifdef INCOMING_RETURN_ADDR_RTX
81 #define DO_FRAME_NOTES 1
83 #define DO_FRAME_NOTES 0
87 static void copy_reg_pointer (rtx, rtx);
88 static void fix_range (const char *);
89 static bool pa_handle_option (size_t, const char *, int);
90 static int hppa_address_cost (rtx);
91 static bool hppa_rtx_costs (rtx, int, int, int *);
92 static inline rtx force_mode (enum machine_mode, rtx);
93 static void pa_reorg (void);
94 static void pa_combine_instructions (void);
95 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
96 static int forward_branch_p (rtx);
97 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
98 static int compute_movmem_length (rtx);
99 static int compute_clrmem_length (rtx);
100 static bool pa_assemble_integer (rtx, unsigned int, int);
101 static void remove_useless_addtr_insns (int);
102 static void store_reg (int, HOST_WIDE_INT, int);
103 static void store_reg_modify (int, int, HOST_WIDE_INT);
104 static void load_reg (int, HOST_WIDE_INT, int);
105 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
106 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
107 static void update_total_code_bytes (int);
108 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
109 static int pa_adjust_cost (rtx, rtx, rtx, int);
110 static int pa_adjust_priority (rtx, int);
111 static int pa_issue_rate (void);
112 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
113 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
115 static void pa_encode_section_info (tree, rtx, int);
116 static const char *pa_strip_name_encoding (const char *);
117 static bool pa_function_ok_for_sibcall (tree, tree);
118 static void pa_globalize_label (FILE *, const char *)
120 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
121 HOST_WIDE_INT, tree);
122 #if !defined(USE_COLLECT2)
123 static void pa_asm_out_constructor (rtx, int);
124 static void pa_asm_out_destructor (rtx, int);
126 static void pa_init_builtins (void);
127 static rtx hppa_builtin_saveregs (void);
128 static tree hppa_gimplify_va_arg_expr (tree, tree, tree *, tree *);
129 static bool pa_scalar_mode_supported_p (enum machine_mode);
130 static bool pa_commutative_p (rtx x, int outer_code);
131 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
132 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
133 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
134 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
135 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
136 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
137 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
138 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
139 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
140 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
141 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
142 static void output_deferred_plabels (void);
143 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
144 #ifdef ASM_OUTPUT_EXTERNAL_REAL
145 static void pa_hpux_file_end (void);
147 #ifdef HPUX_LONG_DOUBLE_LIBRARY
148 static void pa_hpux_init_libfuncs (void);
150 static rtx pa_struct_value_rtx (tree, int);
151 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
153 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
155 static struct machine_function * pa_init_machine_status (void);
156 static enum reg_class pa_secondary_reload (bool, rtx, enum reg_class,
158 secondary_reload_info *);
159 static void pa_extra_live_on_entry (bitmap);
161 /* The following extra sections are only used for SOM. */
162 static GTY(()) section *som_readonly_data_section;
163 static GTY(()) section *som_one_only_readonly_data_section;
164 static GTY(()) section *som_one_only_data_section;
166 /* Save the operands last given to a compare for use when we
167 generate a scc or bcc insn. */
168 rtx hppa_compare_op0, hppa_compare_op1;
169 enum cmp_type hppa_branch_type;
171 /* Which cpu we are scheduling for. */
172 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
174 /* The UNIX standard to use for predefines and linking. */
175 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
177 /* Counts for the number of callee-saved general and floating point
178 registers which were saved by the current function's prologue. */
179 static int gr_saved, fr_saved;
181 /* Boolean indicating whether the return pointer was saved by the
182 current function's prologue. */
183 static bool rp_saved;
185 static rtx find_addr_reg (rtx);
187 /* Keep track of the number of bytes we have output in the CODE subspace
188 during this compilation so we'll know when to emit inline long-calls. */
189 unsigned long total_code_bytes;
191 /* The last address of the previous function plus the number of bytes in
192 associated thunks that have been output. This is used to determine if
193 a thunk can use an IA-relative branch to reach its target function. */
194 static int last_address;
196 /* Variables to handle plabels that we discover are necessary at assembly
197 output time. They are output after the current function. */
198 struct deferred_plabel GTY(())
203 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
205 static size_t n_deferred_plabels = 0;
208 /* Initialize the GCC target structure. */
210 #undef TARGET_ASM_ALIGNED_HI_OP
211 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
212 #undef TARGET_ASM_ALIGNED_SI_OP
213 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
214 #undef TARGET_ASM_ALIGNED_DI_OP
215 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
216 #undef TARGET_ASM_UNALIGNED_HI_OP
217 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
218 #undef TARGET_ASM_UNALIGNED_SI_OP
219 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
220 #undef TARGET_ASM_UNALIGNED_DI_OP
221 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
222 #undef TARGET_ASM_INTEGER
223 #define TARGET_ASM_INTEGER pa_assemble_integer
225 #undef TARGET_ASM_FUNCTION_PROLOGUE
226 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
227 #undef TARGET_ASM_FUNCTION_EPILOGUE
228 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
230 #undef TARGET_SCHED_ADJUST_COST
231 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
232 #undef TARGET_SCHED_ADJUST_PRIORITY
233 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
234 #undef TARGET_SCHED_ISSUE_RATE
235 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
237 #undef TARGET_ENCODE_SECTION_INFO
238 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
239 #undef TARGET_STRIP_NAME_ENCODING
240 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
242 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
243 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
245 #undef TARGET_COMMUTATIVE_P
246 #define TARGET_COMMUTATIVE_P pa_commutative_p
248 #undef TARGET_ASM_OUTPUT_MI_THUNK
249 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
250 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
251 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
253 #undef TARGET_ASM_FILE_END
254 #ifdef ASM_OUTPUT_EXTERNAL_REAL
255 #define TARGET_ASM_FILE_END pa_hpux_file_end
257 #define TARGET_ASM_FILE_END output_deferred_plabels
260 #if !defined(USE_COLLECT2)
261 #undef TARGET_ASM_CONSTRUCTOR
262 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
263 #undef TARGET_ASM_DESTRUCTOR
264 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
267 #undef TARGET_DEFAULT_TARGET_FLAGS
268 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
269 #undef TARGET_HANDLE_OPTION
270 #define TARGET_HANDLE_OPTION pa_handle_option
272 #undef TARGET_INIT_BUILTINS
273 #define TARGET_INIT_BUILTINS pa_init_builtins
275 #undef TARGET_RTX_COSTS
276 #define TARGET_RTX_COSTS hppa_rtx_costs
277 #undef TARGET_ADDRESS_COST
278 #define TARGET_ADDRESS_COST hppa_address_cost
280 #undef TARGET_MACHINE_DEPENDENT_REORG
281 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
283 #ifdef HPUX_LONG_DOUBLE_LIBRARY
284 #undef TARGET_INIT_LIBFUNCS
285 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
288 #undef TARGET_PROMOTE_FUNCTION_RETURN
289 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
290 #undef TARGET_PROMOTE_PROTOTYPES
291 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
293 #undef TARGET_STRUCT_VALUE_RTX
294 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
295 #undef TARGET_RETURN_IN_MEMORY
296 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
297 #undef TARGET_MUST_PASS_IN_STACK
298 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
299 #undef TARGET_PASS_BY_REFERENCE
300 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
301 #undef TARGET_CALLEE_COPIES
302 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
303 #undef TARGET_ARG_PARTIAL_BYTES
304 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
306 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
307 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
308 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
309 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
311 #undef TARGET_SCALAR_MODE_SUPPORTED_P
312 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
314 #undef TARGET_CANNOT_FORCE_CONST_MEM
315 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
317 #undef TARGET_SECONDARY_RELOAD
318 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
320 #undef TARGET_EXTRA_LIVE_ON_ENTRY
321 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
323 struct gcc_target targetm = TARGET_INITIALIZER;
325 /* Parse the -mfixed-range= option string. */
328 fix_range (const char *const_str)
331 char *str, *dash, *comma;
333 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
334 REG2 are either register names or register numbers. The effect
335 of this option is to mark the registers in the range from REG1 to
336 REG2 as ``fixed'' so they won't be used by the compiler. This is
337 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
339 i = strlen (const_str);
340 str = (char *) alloca (i + 1);
341 memcpy (str, const_str, i + 1);
345 dash = strchr (str, '-');
348 warning (0, "value of -mfixed-range must have form REG1-REG2");
353 comma = strchr (dash + 1, ',');
357 first = decode_reg_name (str);
360 warning (0, "unknown register name: %s", str);
364 last = decode_reg_name (dash + 1);
367 warning (0, "unknown register name: %s", dash + 1);
375 warning (0, "%s-%s is an empty range", str, dash + 1);
379 for (i = first; i <= last; ++i)
380 fixed_regs[i] = call_used_regs[i] = 1;
389 /* Check if all floating point registers have been fixed. */
390 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
395 target_flags |= MASK_DISABLE_FPREGS;
398 /* Implement TARGET_HANDLE_OPTION. */
401 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
406 case OPT_mpa_risc_1_0:
408 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
412 case OPT_mpa_risc_1_1:
414 target_flags &= ~MASK_PA_20;
415 target_flags |= MASK_PA_11;
418 case OPT_mpa_risc_2_0:
420 target_flags |= MASK_PA_11 | MASK_PA_20;
424 if (strcmp (arg, "8000") == 0)
425 pa_cpu = PROCESSOR_8000;
426 else if (strcmp (arg, "7100") == 0)
427 pa_cpu = PROCESSOR_7100;
428 else if (strcmp (arg, "700") == 0)
429 pa_cpu = PROCESSOR_700;
430 else if (strcmp (arg, "7100LC") == 0)
431 pa_cpu = PROCESSOR_7100LC;
432 else if (strcmp (arg, "7200") == 0)
433 pa_cpu = PROCESSOR_7200;
434 else if (strcmp (arg, "7300") == 0)
435 pa_cpu = PROCESSOR_7300;
440 case OPT_mfixed_range_:
450 #if TARGET_HPUX_10_10
456 #if TARGET_HPUX_11_11
468 override_options (void)
470 /* Unconditional branches in the delay slot are not compatible with dwarf2
471 call frame information. There is no benefit in using this optimization
472 on PA8000 and later processors. */
473 if (pa_cpu >= PROCESSOR_8000
474 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
475 || flag_unwind_tables)
476 target_flags &= ~MASK_JUMP_IN_DELAY;
478 if (flag_pic && TARGET_PORTABLE_RUNTIME)
480 warning (0, "PIC code generation is not supported in the portable runtime model");
483 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
485 warning (0, "PIC code generation is not compatible with fast indirect calls");
488 if (! TARGET_GAS && write_symbols != NO_DEBUG)
490 warning (0, "-g is only supported when using GAS on this processor,");
491 warning (0, "-g option disabled");
492 write_symbols = NO_DEBUG;
495 /* We only support the "big PIC" model now. And we always generate PIC
496 code when in 64bit mode. */
497 if (flag_pic == 1 || TARGET_64BIT)
500 /* We can't guarantee that .dword is available for 32-bit targets. */
501 if (UNITS_PER_WORD == 4)
502 targetm.asm_out.aligned_op.di = NULL;
504 /* The unaligned ops are only available when using GAS. */
507 targetm.asm_out.unaligned_op.hi = NULL;
508 targetm.asm_out.unaligned_op.si = NULL;
509 targetm.asm_out.unaligned_op.di = NULL;
512 init_machine_status = pa_init_machine_status;
516 pa_init_builtins (void)
518 #ifdef DONT_HAVE_FPUTC_UNLOCKED
519 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
520 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
521 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
522 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
525 if (built_in_decls [BUILT_IN_FINITE])
526 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
527 if (built_in_decls [BUILT_IN_FINITEF])
528 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
532 /* Function to init struct machine_function.
533 This will be called, via a pointer variable,
534 from push_function_context. */
536 static struct machine_function *
537 pa_init_machine_status (void)
539 return ggc_alloc_cleared (sizeof (machine_function));
542 /* If FROM is a probable pointer register, mark TO as a probable
543 pointer register with the same pointer alignment as FROM. */
546 copy_reg_pointer (rtx to, rtx from)
548 if (REG_POINTER (from))
549 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
552 /* Return 1 if X contains a symbolic expression. We know these
553 expressions will have one of a few well defined forms, so
554 we need only check those forms. */
556 symbolic_expression_p (rtx x)
559 /* Strip off any HIGH. */
560 if (GET_CODE (x) == HIGH)
563 return (symbolic_operand (x, VOIDmode));
566 /* Accept any constant that can be moved in one instruction into a
569 cint_ok_for_move (HOST_WIDE_INT ival)
571 /* OK if ldo, ldil, or zdepi, can be used. */
572 return (VAL_14_BITS_P (ival)
573 || ldil_cint_p (ival)
574 || zdepi_cint_p (ival));
577 /* Return truth value of whether OP can be used as an operand in a
580 adddi3_operand (rtx op, enum machine_mode mode)
582 return (register_operand (op, mode)
583 || (GET_CODE (op) == CONST_INT
584 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
587 /* True iff the operand OP can be used as the destination operand of
588 an integer store. This also implies the operand could be used as
589 the source operand of an integer load. Symbolic, lo_sum and indexed
590 memory operands are not allowed. We accept reloading pseudos and
591 other memory operands. */
593 integer_store_memory_operand (rtx op, enum machine_mode mode)
595 return ((reload_in_progress
597 && REGNO (op) >= FIRST_PSEUDO_REGISTER
598 && reg_renumber [REGNO (op)] < 0)
599 || (GET_CODE (op) == MEM
600 && (reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
601 && !symbolic_memory_operand (op, VOIDmode)
602 && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
603 && !IS_INDEX_ADDR_P (XEXP (op, 0))));
606 /* True iff ldil can be used to load this CONST_INT. The least
607 significant 11 bits of the value must be zero and the value must
608 not change sign when extended from 32 to 64 bits. */
610 ldil_cint_p (HOST_WIDE_INT ival)
612 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
614 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
617 /* True iff zdepi can be used to generate this CONST_INT.
618 zdepi first sign extends a 5-bit signed number to a given field
619 length, then places this field anywhere in a zero. */
621 zdepi_cint_p (unsigned HOST_WIDE_INT x)
623 unsigned HOST_WIDE_INT lsb_mask, t;
625 /* This might not be obvious, but it's at least fast.
626 This function is critical; we don't have the time loops would take. */
628 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
629 /* Return true iff t is a power of two. */
630 return ((t & (t - 1)) == 0);
633 /* True iff depi or extru can be used to compute (reg & mask).
634 Accept bit pattern like these:
639 and_mask_p (unsigned HOST_WIDE_INT mask)
642 mask += mask & -mask;
643 return (mask & (mask - 1)) == 0;
646 /* True iff depi can be used to compute (reg | MASK). */
648 ior_mask_p (unsigned HOST_WIDE_INT mask)
650 mask += mask & -mask;
651 return (mask & (mask - 1)) == 0;
654 /* Legitimize PIC addresses. If the address is already
655 position-independent, we return ORIG. Newly generated
656 position-independent addresses go to REG. If we need more
657 than one register, we lose. */
660 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
664 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
666 /* Labels need special handling. */
667 if (pic_label_operand (orig, mode))
669 /* We do not want to go through the movXX expanders here since that
670 would create recursion.
672 Nor do we really want to call a generator for a named pattern
673 since that requires multiple patterns if we want to support
676 So instead we just emit the raw set, which avoids the movXX
677 expanders completely. */
678 mark_reg_pointer (reg, BITS_PER_UNIT);
679 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
680 current_function_uses_pic_offset_table = 1;
683 if (GET_CODE (orig) == SYMBOL_REF)
689 /* Before reload, allocate a temporary register for the intermediate
690 result. This allows the sequence to be deleted when the final
691 result is unused and the insns are trivially dead. */
692 tmp_reg = ((reload_in_progress || reload_completed)
693 ? reg : gen_reg_rtx (Pmode));
695 emit_move_insn (tmp_reg,
696 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
697 gen_rtx_HIGH (word_mode, orig)));
699 = gen_const_mem (Pmode,
700 gen_rtx_LO_SUM (Pmode, tmp_reg,
701 gen_rtx_UNSPEC (Pmode,
705 current_function_uses_pic_offset_table = 1;
706 mark_reg_pointer (reg, BITS_PER_UNIT);
707 insn = emit_move_insn (reg, pic_ref);
709 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
710 set_unique_reg_note (insn, REG_EQUAL, orig);
714 else if (GET_CODE (orig) == CONST)
718 if (GET_CODE (XEXP (orig, 0)) == PLUS
719 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
723 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
725 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
726 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
727 base == reg ? 0 : reg);
729 if (GET_CODE (orig) == CONST_INT)
731 if (INT_14_BITS (orig))
732 return plus_constant (base, INTVAL (orig));
733 orig = force_reg (Pmode, orig);
735 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
736 /* Likewise, should we set special REG_NOTEs here? */
742 static GTY(()) rtx gen_tls_tga;
745 gen_tls_get_addr (void)
748 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
753 hppa_tls_call (rtx arg)
757 ret = gen_reg_rtx (Pmode);
758 emit_library_call_value (gen_tls_get_addr (), ret,
759 LCT_CONST, Pmode, 1, arg, Pmode);
765 legitimize_tls_address (rtx addr)
767 rtx ret, insn, tmp, t1, t2, tp;
768 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
772 case TLS_MODEL_GLOBAL_DYNAMIC:
773 tmp = gen_reg_rtx (Pmode);
775 emit_insn (gen_tgd_load_pic (tmp, addr));
777 emit_insn (gen_tgd_load (tmp, addr));
778 ret = hppa_tls_call (tmp);
781 case TLS_MODEL_LOCAL_DYNAMIC:
782 ret = gen_reg_rtx (Pmode);
783 tmp = gen_reg_rtx (Pmode);
786 emit_insn (gen_tld_load_pic (tmp, addr));
788 emit_insn (gen_tld_load (tmp, addr));
789 t1 = hppa_tls_call (tmp);
792 t2 = gen_reg_rtx (Pmode);
793 emit_libcall_block (insn, t2, t1,
794 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
796 emit_insn (gen_tld_offset_load (ret, addr, t2));
799 case TLS_MODEL_INITIAL_EXEC:
800 tp = gen_reg_rtx (Pmode);
801 tmp = gen_reg_rtx (Pmode);
802 ret = gen_reg_rtx (Pmode);
803 emit_insn (gen_tp_load (tp));
805 emit_insn (gen_tie_load_pic (tmp, addr));
807 emit_insn (gen_tie_load (tmp, addr));
808 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
811 case TLS_MODEL_LOCAL_EXEC:
812 tp = gen_reg_rtx (Pmode);
813 ret = gen_reg_rtx (Pmode);
814 emit_insn (gen_tp_load (tp));
815 emit_insn (gen_tle_load (ret, addr, tp));
825 /* Try machine-dependent ways of modifying an illegitimate address
826 to be legitimate. If we find one, return the new, valid address.
827 This macro is used in only one place: `memory_address' in explow.c.
829 OLDX is the address as it was before break_out_memory_refs was called.
830 In some cases it is useful to look at this to decide what needs to be done.
832 MODE and WIN are passed so that this macro can use
833 GO_IF_LEGITIMATE_ADDRESS.
835 It is always safe for this macro to do nothing. It exists to recognize
836 opportunities to optimize the output.
838 For the PA, transform:
840 memory(X + <large int>)
844 if (<large int> & mask) >= 16
845 Y = (<large int> & ~mask) + mask + 1 Round up.
847 Y = (<large int> & ~mask) Round down.
849 memory (Z + (<large int> - Y));
851 This is for CSE to find several similar references, and only use one Z.
853 X can either be a SYMBOL_REF or REG, but because combine cannot
854 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
855 D will not fit in 14 bits.
857 MODE_FLOAT references allow displacements which fit in 5 bits, so use
860 MODE_INT references allow displacements which fit in 14 bits, so use
863 This relies on the fact that most mode MODE_FLOAT references will use FP
864 registers and most mode MODE_INT references will use integer registers.
865 (In the rare case of an FP register used in an integer MODE, we depend
866 on secondary reloads to clean things up.)
869 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
870 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
871 addressing modes to be used).
873 Put X and Z into registers. Then put the entire expression into
877 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
878 enum machine_mode mode)
882 /* We need to canonicalize the order of operands in unscaled indexed
883 addresses since the code that checks if an address is valid doesn't
884 always try both orders. */
885 if (!TARGET_NO_SPACE_REGS
886 && GET_CODE (x) == PLUS
887 && GET_MODE (x) == Pmode
888 && REG_P (XEXP (x, 0))
889 && REG_P (XEXP (x, 1))
890 && REG_POINTER (XEXP (x, 0))
891 && !REG_POINTER (XEXP (x, 1)))
892 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
894 if (PA_SYMBOL_REF_TLS_P (x))
895 return legitimize_tls_address (x);
897 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
899 /* Strip off CONST. */
900 if (GET_CODE (x) == CONST)
903 /* Special case. Get the SYMBOL_REF into a register and use indexing.
904 That should always be safe. */
905 if (GET_CODE (x) == PLUS
906 && GET_CODE (XEXP (x, 0)) == REG
907 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
909 rtx reg = force_reg (Pmode, XEXP (x, 1));
910 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
913 /* Note we must reject symbols which represent function addresses
914 since the assembler/linker can't handle arithmetic on plabels. */
915 if (GET_CODE (x) == PLUS
916 && GET_CODE (XEXP (x, 1)) == CONST_INT
917 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
918 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
919 || GET_CODE (XEXP (x, 0)) == REG))
921 rtx int_part, ptr_reg;
923 int offset = INTVAL (XEXP (x, 1));
926 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
927 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
929 /* Choose which way to round the offset. Round up if we
930 are >= halfway to the next boundary. */
931 if ((offset & mask) >= ((mask + 1) / 2))
932 newoffset = (offset & ~ mask) + mask + 1;
934 newoffset = (offset & ~ mask);
936 /* If the newoffset will not fit in 14 bits (ldo), then
937 handling this would take 4 or 5 instructions (2 to load
938 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
939 add the new offset and the SYMBOL_REF.) Combine can
940 not handle 4->2 or 5->2 combinations, so do not create
942 if (! VAL_14_BITS_P (newoffset)
943 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
945 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
948 gen_rtx_HIGH (Pmode, const_part));
951 gen_rtx_LO_SUM (Pmode,
952 tmp_reg, const_part));
956 if (! VAL_14_BITS_P (newoffset))
957 int_part = force_reg (Pmode, GEN_INT (newoffset));
959 int_part = GEN_INT (newoffset);
961 ptr_reg = force_reg (Pmode,
963 force_reg (Pmode, XEXP (x, 0)),
966 return plus_constant (ptr_reg, offset - newoffset);
969 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
971 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
972 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
973 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
974 && (OBJECT_P (XEXP (x, 1))
975 || GET_CODE (XEXP (x, 1)) == SUBREG)
976 && GET_CODE (XEXP (x, 1)) != CONST)
978 int val = INTVAL (XEXP (XEXP (x, 0), 1));
982 if (GET_CODE (reg1) != REG)
983 reg1 = force_reg (Pmode, force_operand (reg1, 0));
985 reg2 = XEXP (XEXP (x, 0), 0);
986 if (GET_CODE (reg2) != REG)
987 reg2 = force_reg (Pmode, force_operand (reg2, 0));
989 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
996 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
998 Only do so for floating point modes since this is more speculative
999 and we lose if it's an integer store. */
1000 if (GET_CODE (x) == PLUS
1001 && GET_CODE (XEXP (x, 0)) == PLUS
1002 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1003 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1004 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1005 && (mode == SFmode || mode == DFmode))
1008 /* First, try and figure out what to use as a base register. */
1009 rtx reg1, reg2, base, idx, orig_base;
1011 reg1 = XEXP (XEXP (x, 0), 1);
1016 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1017 then emit_move_sequence will turn on REG_POINTER so we'll know
1018 it's a base register below. */
1019 if (GET_CODE (reg1) != REG)
1020 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1022 if (GET_CODE (reg2) != REG)
1023 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1025 /* Figure out what the base and index are. */
1027 if (GET_CODE (reg1) == REG
1028 && REG_POINTER (reg1))
1031 orig_base = XEXP (XEXP (x, 0), 1);
1032 idx = gen_rtx_PLUS (Pmode,
1033 gen_rtx_MULT (Pmode,
1034 XEXP (XEXP (XEXP (x, 0), 0), 0),
1035 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1038 else if (GET_CODE (reg2) == REG
1039 && REG_POINTER (reg2))
1042 orig_base = XEXP (x, 1);
1049 /* If the index adds a large constant, try to scale the
1050 constant so that it can be loaded with only one insn. */
1051 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1052 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1053 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1054 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1056 /* Divide the CONST_INT by the scale factor, then add it to A. */
1057 int val = INTVAL (XEXP (idx, 1));
1059 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1060 reg1 = XEXP (XEXP (idx, 0), 0);
1061 if (GET_CODE (reg1) != REG)
1062 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1064 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1066 /* We can now generate a simple scaled indexed address. */
1069 (Pmode, gen_rtx_PLUS (Pmode,
1070 gen_rtx_MULT (Pmode, reg1,
1071 XEXP (XEXP (idx, 0), 1)),
1075 /* If B + C is still a valid base register, then add them. */
1076 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1077 && INTVAL (XEXP (idx, 1)) <= 4096
1078 && INTVAL (XEXP (idx, 1)) >= -4096)
1080 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1083 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1085 reg2 = XEXP (XEXP (idx, 0), 0);
1086 if (GET_CODE (reg2) != CONST_INT)
1087 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1089 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1090 gen_rtx_MULT (Pmode,
1096 /* Get the index into a register, then add the base + index and
1097 return a register holding the result. */
1099 /* First get A into a register. */
1100 reg1 = XEXP (XEXP (idx, 0), 0);
1101 if (GET_CODE (reg1) != REG)
1102 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1104 /* And get B into a register. */
1105 reg2 = XEXP (idx, 1);
1106 if (GET_CODE (reg2) != REG)
1107 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1109 reg1 = force_reg (Pmode,
1110 gen_rtx_PLUS (Pmode,
1111 gen_rtx_MULT (Pmode, reg1,
1112 XEXP (XEXP (idx, 0), 1)),
1115 /* Add the result to our base register and return. */
1116 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1120 /* Uh-oh. We might have an address for x[n-100000]. This needs
1121 special handling to avoid creating an indexed memory address
1122 with x-100000 as the base.
1124 If the constant part is small enough, then it's still safe because
1125 there is a guard page at the beginning and end of the data segment.
1127 Scaled references are common enough that we want to try and rearrange the
1128 terms so that we can use indexing for these addresses too. Only
1129 do the optimization for floatint point modes. */
1131 if (GET_CODE (x) == PLUS
1132 && symbolic_expression_p (XEXP (x, 1)))
1134 /* Ugly. We modify things here so that the address offset specified
1135 by the index expression is computed first, then added to x to form
1136 the entire address. */
1138 rtx regx1, regx2, regy1, regy2, y;
1140 /* Strip off any CONST. */
1142 if (GET_CODE (y) == CONST)
1145 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1147 /* See if this looks like
1148 (plus (mult (reg) (shadd_const))
1149 (const (plus (symbol_ref) (const_int))))
1151 Where const_int is small. In that case the const
1152 expression is a valid pointer for indexing.
1154 If const_int is big, but can be divided evenly by shadd_const
1155 and added to (reg). This allows more scaled indexed addresses. */
1156 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1157 && GET_CODE (XEXP (x, 0)) == MULT
1158 && GET_CODE (XEXP (y, 1)) == CONST_INT
1159 && INTVAL (XEXP (y, 1)) >= -4096
1160 && INTVAL (XEXP (y, 1)) <= 4095
1161 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1162 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1164 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1168 if (GET_CODE (reg1) != REG)
1169 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1171 reg2 = XEXP (XEXP (x, 0), 0);
1172 if (GET_CODE (reg2) != REG)
1173 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1175 return force_reg (Pmode,
1176 gen_rtx_PLUS (Pmode,
1177 gen_rtx_MULT (Pmode,
1182 else if ((mode == DFmode || mode == SFmode)
1183 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1184 && GET_CODE (XEXP (x, 0)) == MULT
1185 && GET_CODE (XEXP (y, 1)) == CONST_INT
1186 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1187 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1188 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1191 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1192 / INTVAL (XEXP (XEXP (x, 0), 1))));
1193 regx2 = XEXP (XEXP (x, 0), 0);
1194 if (GET_CODE (regx2) != REG)
1195 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1196 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1200 gen_rtx_PLUS (Pmode,
1201 gen_rtx_MULT (Pmode, regx2,
1202 XEXP (XEXP (x, 0), 1)),
1203 force_reg (Pmode, XEXP (y, 0))));
1205 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1206 && INTVAL (XEXP (y, 1)) >= -4096
1207 && INTVAL (XEXP (y, 1)) <= 4095)
1209 /* This is safe because of the guard page at the
1210 beginning and end of the data space. Just
1211 return the original address. */
1216 /* Doesn't look like one we can optimize. */
1217 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1218 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1219 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1220 regx1 = force_reg (Pmode,
1221 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1223 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1231 /* For the HPPA, REG and REG+CONST is cost 0
1232 and addresses involving symbolic constants are cost 2.
1234 PIC addresses are very expensive.
1236 It is no coincidence that this has the same structure
1237 as GO_IF_LEGITIMATE_ADDRESS. */
1240 hppa_address_cost (rtx X)
1242 switch (GET_CODE (X))
1255 /* Compute a (partial) cost for rtx X. Return true if the complete
1256 cost has been computed, and false if subexpressions should be
1257 scanned. In either case, *TOTAL contains the cost result. */
1260 hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
1265 if (INTVAL (x) == 0)
1267 else if (INT_14_BITS (x))
1284 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1285 && outer_code != SET)
1292 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1293 *total = COSTS_N_INSNS (3);
1294 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1295 *total = COSTS_N_INSNS (8);
1297 *total = COSTS_N_INSNS (20);
1301 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1303 *total = COSTS_N_INSNS (14);
1311 *total = COSTS_N_INSNS (60);
1314 case PLUS: /* this includes shNadd insns */
1316 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1317 *total = COSTS_N_INSNS (3);
1319 *total = COSTS_N_INSNS (1);
1325 *total = COSTS_N_INSNS (1);
1333 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1334 new rtx with the correct mode. */
1336 force_mode (enum machine_mode mode, rtx orig)
1338 if (mode == GET_MODE (orig))
1341 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1343 return gen_rtx_REG (mode, REGNO (orig));
1346 /* Return 1 if *X is a thread-local symbol. */
1349 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1351 return PA_SYMBOL_REF_TLS_P (*x);
1354 /* Return 1 if X contains a thread-local symbol. */
1357 pa_tls_referenced_p (rtx x)
1359 if (!TARGET_HAVE_TLS)
1362 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1365 /* Emit insns to move operands[1] into operands[0].
1367 Return 1 if we have written out everything that needs to be done to
1368 do the move. Otherwise, return 0 and the caller will emit the move
1371 Note SCRATCH_REG may not be in the proper mode depending on how it
1372 will be used. This routine is responsible for creating a new copy
1373 of SCRATCH_REG in the proper mode. */
1376 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1378 register rtx operand0 = operands[0];
1379 register rtx operand1 = operands[1];
1382 /* We can only handle indexed addresses in the destination operand
1383 of floating point stores. Thus, we need to break out indexed
1384 addresses from the destination operand. */
1385 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1387 gcc_assert (can_create_pseudo_p ());
1389 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1390 operand0 = replace_equiv_address (operand0, tem);
1393 /* On targets with non-equivalent space registers, break out unscaled
1394 indexed addresses from the source operand before the final CSE.
1395 We have to do this because the REG_POINTER flag is not correctly
1396 carried through various optimization passes and CSE may substitute
1397 a pseudo without the pointer set for one with the pointer set. As
1398 a result, we loose various opportunities to create insns with
1399 unscaled indexed addresses. */
1400 if (!TARGET_NO_SPACE_REGS
1401 && !cse_not_expected
1402 && GET_CODE (operand1) == MEM
1403 && GET_CODE (XEXP (operand1, 0)) == PLUS
1404 && REG_P (XEXP (XEXP (operand1, 0), 0))
1405 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1407 = replace_equiv_address (operand1,
1408 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1411 && reload_in_progress && GET_CODE (operand0) == REG
1412 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1413 operand0 = reg_equiv_mem[REGNO (operand0)];
1414 else if (scratch_reg
1415 && reload_in_progress && GET_CODE (operand0) == SUBREG
1416 && GET_CODE (SUBREG_REG (operand0)) == REG
1417 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1419 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1420 the code which tracks sets/uses for delete_output_reload. */
1421 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1422 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1423 SUBREG_BYTE (operand0));
1424 operand0 = alter_subreg (&temp);
1428 && reload_in_progress && GET_CODE (operand1) == REG
1429 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1430 operand1 = reg_equiv_mem[REGNO (operand1)];
1431 else if (scratch_reg
1432 && reload_in_progress && GET_CODE (operand1) == SUBREG
1433 && GET_CODE (SUBREG_REG (operand1)) == REG
1434 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1436 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1437 the code which tracks sets/uses for delete_output_reload. */
1438 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1439 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1440 SUBREG_BYTE (operand1));
1441 operand1 = alter_subreg (&temp);
1444 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1445 && ((tem = find_replacement (&XEXP (operand0, 0)))
1446 != XEXP (operand0, 0)))
1447 operand0 = replace_equiv_address (operand0, tem);
1449 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1450 && ((tem = find_replacement (&XEXP (operand1, 0)))
1451 != XEXP (operand1, 0)))
1452 operand1 = replace_equiv_address (operand1, tem);
1454 /* Handle secondary reloads for loads/stores of FP registers from
1455 REG+D addresses where D does not fit in 5 or 14 bits, including
1456 (subreg (mem (addr))) cases. */
1458 && fp_reg_operand (operand0, mode)
1459 && ((GET_CODE (operand1) == MEM
1460 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1461 XEXP (operand1, 0)))
1462 || ((GET_CODE (operand1) == SUBREG
1463 && GET_CODE (XEXP (operand1, 0)) == MEM
1464 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1466 XEXP (XEXP (operand1, 0), 0))))))
1468 if (GET_CODE (operand1) == SUBREG)
1469 operand1 = XEXP (operand1, 0);
1471 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1472 it in WORD_MODE regardless of what mode it was originally given
1474 scratch_reg = force_mode (word_mode, scratch_reg);
1476 /* D might not fit in 14 bits either; for such cases load D into
1478 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1480 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1481 emit_move_insn (scratch_reg,
1482 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1484 XEXP (XEXP (operand1, 0), 0),
1488 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1489 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1490 replace_equiv_address (operand1, scratch_reg)));
1493 else if (scratch_reg
1494 && fp_reg_operand (operand1, mode)
1495 && ((GET_CODE (operand0) == MEM
1496 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1498 XEXP (operand0, 0)))
1499 || ((GET_CODE (operand0) == SUBREG)
1500 && GET_CODE (XEXP (operand0, 0)) == MEM
1501 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1503 XEXP (XEXP (operand0, 0), 0)))))
1505 if (GET_CODE (operand0) == SUBREG)
1506 operand0 = XEXP (operand0, 0);
1508 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1509 it in WORD_MODE regardless of what mode it was originally given
1511 scratch_reg = force_mode (word_mode, scratch_reg);
1513 /* D might not fit in 14 bits either; for such cases load D into
1515 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1517 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1518 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1521 XEXP (XEXP (operand0, 0),
1526 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1527 emit_insn (gen_rtx_SET (VOIDmode,
1528 replace_equiv_address (operand0, scratch_reg),
1532 /* Handle secondary reloads for loads of FP registers from constant
1533 expressions by forcing the constant into memory.
1535 Use scratch_reg to hold the address of the memory location.
1537 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1538 NO_REGS when presented with a const_int and a register class
1539 containing only FP registers. Doing so unfortunately creates
1540 more problems than it solves. Fix this for 2.5. */
1541 else if (scratch_reg
1542 && CONSTANT_P (operand1)
1543 && fp_reg_operand (operand0, mode))
1545 rtx const_mem, xoperands[2];
1547 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1548 it in WORD_MODE regardless of what mode it was originally given
1550 scratch_reg = force_mode (word_mode, scratch_reg);
1552 /* Force the constant into memory and put the address of the
1553 memory location into scratch_reg. */
1554 const_mem = force_const_mem (mode, operand1);
1555 xoperands[0] = scratch_reg;
1556 xoperands[1] = XEXP (const_mem, 0);
1557 emit_move_sequence (xoperands, Pmode, 0);
1559 /* Now load the destination register. */
1560 emit_insn (gen_rtx_SET (mode, operand0,
1561 replace_equiv_address (const_mem, scratch_reg)));
1564 /* Handle secondary reloads for SAR. These occur when trying to load
1565 the SAR from memory, FP register, or with a constant. */
1566 else if (scratch_reg
1567 && GET_CODE (operand0) == REG
1568 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1569 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1570 && (GET_CODE (operand1) == MEM
1571 || GET_CODE (operand1) == CONST_INT
1572 || (GET_CODE (operand1) == REG
1573 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1575 /* D might not fit in 14 bits either; for such cases load D into
1577 if (GET_CODE (operand1) == MEM
1578 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1580 /* We are reloading the address into the scratch register, so we
1581 want to make sure the scratch register is a full register. */
1582 scratch_reg = force_mode (word_mode, scratch_reg);
1584 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1585 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1588 XEXP (XEXP (operand1, 0),
1592 /* Now we are going to load the scratch register from memory,
1593 we want to load it in the same width as the original MEM,
1594 which must be the same as the width of the ultimate destination,
1596 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1598 emit_move_insn (scratch_reg,
1599 replace_equiv_address (operand1, scratch_reg));
1603 /* We want to load the scratch register using the same mode as
1604 the ultimate destination. */
1605 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1607 emit_move_insn (scratch_reg, operand1);
1610 /* And emit the insn to set the ultimate destination. We know that
1611 the scratch register has the same mode as the destination at this
1613 emit_move_insn (operand0, scratch_reg);
1616 /* Handle the most common case: storing into a register. */
1617 else if (register_operand (operand0, mode))
1619 if (register_operand (operand1, mode)
1620 || (GET_CODE (operand1) == CONST_INT
1621 && cint_ok_for_move (INTVAL (operand1)))
1622 || (operand1 == CONST0_RTX (mode))
1623 || (GET_CODE (operand1) == HIGH
1624 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1625 /* Only `general_operands' can come here, so MEM is ok. */
1626 || GET_CODE (operand1) == MEM)
1628 /* Various sets are created during RTL generation which don't
1629 have the REG_POINTER flag correctly set. After the CSE pass,
1630 instruction recognition can fail if we don't consistently
1631 set this flag when performing register copies. This should
1632 also improve the opportunities for creating insns that use
1633 unscaled indexing. */
1634 if (REG_P (operand0) && REG_P (operand1))
1636 if (REG_POINTER (operand1)
1637 && !REG_POINTER (operand0)
1638 && !HARD_REGISTER_P (operand0))
1639 copy_reg_pointer (operand0, operand1);
1640 else if (REG_POINTER (operand0)
1641 && !REG_POINTER (operand1)
1642 && !HARD_REGISTER_P (operand1))
1643 copy_reg_pointer (operand1, operand0);
1646 /* When MEMs are broken out, the REG_POINTER flag doesn't
1647 get set. In some cases, we can set the REG_POINTER flag
1648 from the declaration for the MEM. */
1649 if (REG_P (operand0)
1650 && GET_CODE (operand1) == MEM
1651 && !REG_POINTER (operand0))
1653 tree decl = MEM_EXPR (operand1);
1655 /* Set the register pointer flag and register alignment
1656 if the declaration for this memory reference is a
1657 pointer type. Fortran indirect argument references
1660 && !(flag_argument_noalias > 1
1661 && TREE_CODE (decl) == INDIRECT_REF
1662 && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1666 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1668 if (TREE_CODE (decl) == COMPONENT_REF)
1669 decl = TREE_OPERAND (decl, 1);
1671 type = TREE_TYPE (decl);
1672 if (TREE_CODE (type) == ARRAY_TYPE)
1673 type = get_inner_array_type (type);
1675 if (POINTER_TYPE_P (type))
1679 type = TREE_TYPE (type);
1680 /* Using TYPE_ALIGN_OK is rather conservative as
1681 only the ada frontend actually sets it. */
1682 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1684 mark_reg_pointer (operand0, align);
1689 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1693 else if (GET_CODE (operand0) == MEM)
1695 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1696 && !(reload_in_progress || reload_completed))
1698 rtx temp = gen_reg_rtx (DFmode);
1700 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1701 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1704 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1706 /* Run this case quickly. */
1707 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1710 if (! (reload_in_progress || reload_completed))
1712 operands[0] = validize_mem (operand0);
1713 operands[1] = operand1 = force_reg (mode, operand1);
1717 /* Simplify the source if we need to.
1718 Note we do have to handle function labels here, even though we do
1719 not consider them legitimate constants. Loop optimizations can
1720 call the emit_move_xxx with one as a source. */
1721 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1722 || function_label_operand (operand1, mode)
1723 || (GET_CODE (operand1) == HIGH
1724 && symbolic_operand (XEXP (operand1, 0), mode)))
1728 if (GET_CODE (operand1) == HIGH)
1731 operand1 = XEXP (operand1, 0);
1733 if (symbolic_operand (operand1, mode))
1735 /* Argh. The assembler and linker can't handle arithmetic
1738 So we force the plabel into memory, load operand0 from
1739 the memory location, then add in the constant part. */
1740 if ((GET_CODE (operand1) == CONST
1741 && GET_CODE (XEXP (operand1, 0)) == PLUS
1742 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1743 || function_label_operand (operand1, mode))
1745 rtx temp, const_part;
1747 /* Figure out what (if any) scratch register to use. */
1748 if (reload_in_progress || reload_completed)
1750 scratch_reg = scratch_reg ? scratch_reg : operand0;
1751 /* SCRATCH_REG will hold an address and maybe the actual
1752 data. We want it in WORD_MODE regardless of what mode it
1753 was originally given to us. */
1754 scratch_reg = force_mode (word_mode, scratch_reg);
1757 scratch_reg = gen_reg_rtx (Pmode);
1759 if (GET_CODE (operand1) == CONST)
1761 /* Save away the constant part of the expression. */
1762 const_part = XEXP (XEXP (operand1, 0), 1);
1763 gcc_assert (GET_CODE (const_part) == CONST_INT);
1765 /* Force the function label into memory. */
1766 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1770 /* No constant part. */
1771 const_part = NULL_RTX;
1773 /* Force the function label into memory. */
1774 temp = force_const_mem (mode, operand1);
1778 /* Get the address of the memory location. PIC-ify it if
1780 temp = XEXP (temp, 0);
1782 temp = legitimize_pic_address (temp, mode, scratch_reg);
1784 /* Put the address of the memory location into our destination
1787 emit_move_sequence (operands, mode, scratch_reg);
1789 /* Now load from the memory location into our destination
1791 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1792 emit_move_sequence (operands, mode, scratch_reg);
1794 /* And add back in the constant part. */
1795 if (const_part != NULL_RTX)
1796 expand_inc (operand0, const_part);
1805 if (reload_in_progress || reload_completed)
1807 temp = scratch_reg ? scratch_reg : operand0;
1808 /* TEMP will hold an address and maybe the actual
1809 data. We want it in WORD_MODE regardless of what mode it
1810 was originally given to us. */
1811 temp = force_mode (word_mode, temp);
1814 temp = gen_reg_rtx (Pmode);
1816 /* (const (plus (symbol) (const_int))) must be forced to
1817 memory during/after reload if the const_int will not fit
1819 if (GET_CODE (operand1) == CONST
1820 && GET_CODE (XEXP (operand1, 0)) == PLUS
1821 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1822 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1823 && (reload_completed || reload_in_progress)
1826 rtx const_mem = force_const_mem (mode, operand1);
1827 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1829 operands[1] = replace_equiv_address (const_mem, operands[1]);
1830 emit_move_sequence (operands, mode, temp);
1834 operands[1] = legitimize_pic_address (operand1, mode, temp);
1835 if (REG_P (operand0) && REG_P (operands[1]))
1836 copy_reg_pointer (operand0, operands[1]);
1837 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1840 /* On the HPPA, references to data space are supposed to use dp,
1841 register 27, but showing it in the RTL inhibits various cse
1842 and loop optimizations. */
1847 if (reload_in_progress || reload_completed)
1849 temp = scratch_reg ? scratch_reg : operand0;
1850 /* TEMP will hold an address and maybe the actual
1851 data. We want it in WORD_MODE regardless of what mode it
1852 was originally given to us. */
1853 temp = force_mode (word_mode, temp);
1856 temp = gen_reg_rtx (mode);
1858 /* Loading a SYMBOL_REF into a register makes that register
1859 safe to be used as the base in an indexed address.
1861 Don't mark hard registers though. That loses. */
1862 if (GET_CODE (operand0) == REG
1863 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1864 mark_reg_pointer (operand0, BITS_PER_UNIT);
1865 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1866 mark_reg_pointer (temp, BITS_PER_UNIT);
1869 set = gen_rtx_SET (mode, operand0, temp);
1871 set = gen_rtx_SET (VOIDmode,
1873 gen_rtx_LO_SUM (mode, temp, operand1));
1875 emit_insn (gen_rtx_SET (VOIDmode,
1877 gen_rtx_HIGH (mode, operand1)));
1883 else if (pa_tls_referenced_p (operand1))
1888 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1890 addend = XEXP (XEXP (tmp, 0), 1);
1891 tmp = XEXP (XEXP (tmp, 0), 0);
1894 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1895 tmp = legitimize_tls_address (tmp);
1898 tmp = gen_rtx_PLUS (mode, tmp, addend);
1899 tmp = force_operand (tmp, operands[0]);
1903 else if (GET_CODE (operand1) != CONST_INT
1904 || !cint_ok_for_move (INTVAL (operand1)))
1908 HOST_WIDE_INT value = 0;
1909 HOST_WIDE_INT insv = 0;
1912 if (GET_CODE (operand1) == CONST_INT)
1913 value = INTVAL (operand1);
1916 && GET_CODE (operand1) == CONST_INT
1917 && HOST_BITS_PER_WIDE_INT > 32
1918 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1922 /* Extract the low order 32 bits of the value and sign extend.
1923 If the new value is the same as the original value, we can
1924 can use the original value as-is. If the new value is
1925 different, we use it and insert the most-significant 32-bits
1926 of the original value into the final result. */
1927 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1928 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1931 #if HOST_BITS_PER_WIDE_INT > 32
1932 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
1936 operand1 = GEN_INT (nval);
1940 if (reload_in_progress || reload_completed)
1941 temp = scratch_reg ? scratch_reg : operand0;
1943 temp = gen_reg_rtx (mode);
1945 /* We don't directly split DImode constants on 32-bit targets
1946 because PLUS uses an 11-bit immediate and the insn sequence
1947 generated is not as efficient as the one using HIGH/LO_SUM. */
1948 if (GET_CODE (operand1) == CONST_INT
1949 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
1950 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
1953 /* Directly break constant into high and low parts. This
1954 provides better optimization opportunities because various
1955 passes recognize constants split with PLUS but not LO_SUM.
1956 We use a 14-bit signed low part except when the addition
1957 of 0x4000 to the high part might change the sign of the
1959 HOST_WIDE_INT low = value & 0x3fff;
1960 HOST_WIDE_INT high = value & ~ 0x3fff;
1964 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1972 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1973 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1977 emit_insn (gen_rtx_SET (VOIDmode, temp,
1978 gen_rtx_HIGH (mode, operand1)));
1979 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1982 insn = emit_move_insn (operands[0], operands[1]);
1984 /* Now insert the most significant 32 bits of the value
1985 into the register. When we don't have a second register
1986 available, it could take up to nine instructions to load
1987 a 64-bit integer constant. Prior to reload, we force
1988 constants that would take more than three instructions
1989 to load to the constant pool. During and after reload,
1990 we have to handle all possible values. */
1993 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
1994 register and the value to be inserted is outside the
1995 range that can be loaded with three depdi instructions. */
1996 if (temp != operand0 && (insv >= 16384 || insv < -16384))
1998 operand1 = GEN_INT (insv);
2000 emit_insn (gen_rtx_SET (VOIDmode, temp,
2001 gen_rtx_HIGH (mode, operand1)));
2002 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2003 emit_insn (gen_insv (operand0, GEN_INT (32),
2008 int len = 5, pos = 27;
2010 /* Insert the bits using the depdi instruction. */
2013 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2014 HOST_WIDE_INT sign = v5 < 0;
2016 /* Left extend the insertion. */
2017 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2018 while (pos > 0 && (insv & 1) == sign)
2020 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2025 emit_insn (gen_insv (operand0, GEN_INT (len),
2026 GEN_INT (pos), GEN_INT (v5)));
2028 len = pos > 0 && pos < 5 ? pos : 5;
2034 set_unique_reg_note (insn, REG_EQUAL, op1);
2039 /* Now have insn-emit do whatever it normally does. */
2043 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2044 it will need a link/runtime reloc). */
2047 reloc_needed (tree exp)
2051 switch (TREE_CODE (exp))
2056 case POINTER_PLUS_EXPR:
2059 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2060 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2065 case NON_LVALUE_EXPR:
2066 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2072 unsigned HOST_WIDE_INT ix;
2074 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2076 reloc |= reloc_needed (value);
2089 /* Does operand (which is a symbolic_operand) live in text space?
2090 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2094 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2096 if (GET_CODE (operand) == CONST)
2097 operand = XEXP (XEXP (operand, 0), 0);
2100 if (GET_CODE (operand) == SYMBOL_REF)
2101 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2105 if (GET_CODE (operand) == SYMBOL_REF)
2106 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2112 /* Return the best assembler insn template
2113 for moving operands[1] into operands[0] as a fullword. */
2115 singlemove_string (rtx *operands)
2117 HOST_WIDE_INT intval;
2119 if (GET_CODE (operands[0]) == MEM)
2120 return "stw %r1,%0";
2121 if (GET_CODE (operands[1]) == MEM)
2123 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2128 gcc_assert (GET_MODE (operands[1]) == SFmode);
2130 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2132 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2133 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2135 operands[1] = GEN_INT (i);
2136 /* Fall through to CONST_INT case. */
2138 if (GET_CODE (operands[1]) == CONST_INT)
2140 intval = INTVAL (operands[1]);
2142 if (VAL_14_BITS_P (intval))
2144 else if ((intval & 0x7ff) == 0)
2145 return "ldil L'%1,%0";
2146 else if (zdepi_cint_p (intval))
2147 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2149 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2151 return "copy %1,%0";
2155 /* Compute position (in OP[1]) and width (in OP[2])
2156 useful for copying IMM to a register using the zdepi
2157 instructions. Store the immediate value to insert in OP[0]. */
2159 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2163 /* Find the least significant set bit in IMM. */
2164 for (lsb = 0; lsb < 32; lsb++)
2171 /* Choose variants based on *sign* of the 5-bit field. */
2172 if ((imm & 0x10) == 0)
2173 len = (lsb <= 28) ? 4 : 32 - lsb;
2176 /* Find the width of the bitstring in IMM. */
2177 for (len = 5; len < 32; len++)
2179 if ((imm & (1 << len)) == 0)
2183 /* Sign extend IMM as a 5-bit value. */
2184 imm = (imm & 0xf) - 0x10;
2192 /* Compute position (in OP[1]) and width (in OP[2])
2193 useful for copying IMM to a register using the depdi,z
2194 instructions. Store the immediate value to insert in OP[0]. */
2196 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2198 HOST_WIDE_INT lsb, len;
2200 /* Find the least significant set bit in IMM. */
2201 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2208 /* Choose variants based on *sign* of the 5-bit field. */
2209 if ((imm & 0x10) == 0)
2210 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2211 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2214 /* Find the width of the bitstring in IMM. */
2215 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2217 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2221 /* Sign extend IMM as a 5-bit value. */
2222 imm = (imm & 0xf) - 0x10;
2230 /* Output assembler code to perform a doubleword move insn
2231 with operands OPERANDS. */
2234 output_move_double (rtx *operands)
2236 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2238 rtx addreg0 = 0, addreg1 = 0;
2240 /* First classify both operands. */
2242 if (REG_P (operands[0]))
2244 else if (offsettable_memref_p (operands[0]))
2246 else if (GET_CODE (operands[0]) == MEM)
2251 if (REG_P (operands[1]))
2253 else if (CONSTANT_P (operands[1]))
2255 else if (offsettable_memref_p (operands[1]))
2257 else if (GET_CODE (operands[1]) == MEM)
2262 /* Check for the cases that the operand constraints are not
2263 supposed to allow to happen. */
2264 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2266 /* Handle copies between general and floating registers. */
2268 if (optype0 == REGOP && optype1 == REGOP
2269 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2271 if (FP_REG_P (operands[0]))
2273 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2274 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2275 return "{fldds|fldd} -16(%%sp),%0";
2279 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2280 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2281 return "{ldws|ldw} -12(%%sp),%R0";
2285 /* Handle auto decrementing and incrementing loads and stores
2286 specifically, since the structure of the function doesn't work
2287 for them without major modification. Do it better when we learn
2288 this port about the general inc/dec addressing of PA.
2289 (This was written by tege. Chide him if it doesn't work.) */
2291 if (optype0 == MEMOP)
2293 /* We have to output the address syntax ourselves, since print_operand
2294 doesn't deal with the addresses we want to use. Fix this later. */
2296 rtx addr = XEXP (operands[0], 0);
2297 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2299 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2301 operands[0] = XEXP (addr, 0);
2302 gcc_assert (GET_CODE (operands[1]) == REG
2303 && GET_CODE (operands[0]) == REG);
2305 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2307 /* No overlap between high target register and address
2308 register. (We do this in a non-obvious way to
2309 save a register file writeback) */
2310 if (GET_CODE (addr) == POST_INC)
2311 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2312 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2314 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2316 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2318 operands[0] = XEXP (addr, 0);
2319 gcc_assert (GET_CODE (operands[1]) == REG
2320 && GET_CODE (operands[0]) == REG);
2322 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2323 /* No overlap between high target register and address
2324 register. (We do this in a non-obvious way to save a
2325 register file writeback) */
2326 if (GET_CODE (addr) == PRE_INC)
2327 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2328 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2331 if (optype1 == MEMOP)
2333 /* We have to output the address syntax ourselves, since print_operand
2334 doesn't deal with the addresses we want to use. Fix this later. */
2336 rtx addr = XEXP (operands[1], 0);
2337 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2339 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2341 operands[1] = XEXP (addr, 0);
2342 gcc_assert (GET_CODE (operands[0]) == REG
2343 && GET_CODE (operands[1]) == REG);
2345 if (!reg_overlap_mentioned_p (high_reg, addr))
2347 /* No overlap between high target register and address
2348 register. (We do this in a non-obvious way to
2349 save a register file writeback) */
2350 if (GET_CODE (addr) == POST_INC)
2351 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2352 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2356 /* This is an undefined situation. We should load into the
2357 address register *and* update that register. Probably
2358 we don't need to handle this at all. */
2359 if (GET_CODE (addr) == POST_INC)
2360 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2361 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2364 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2366 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2368 operands[1] = XEXP (addr, 0);
2369 gcc_assert (GET_CODE (operands[0]) == REG
2370 && GET_CODE (operands[1]) == REG);
2372 if (!reg_overlap_mentioned_p (high_reg, addr))
2374 /* No overlap between high target register and address
2375 register. (We do this in a non-obvious way to
2376 save a register file writeback) */
2377 if (GET_CODE (addr) == PRE_INC)
2378 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2379 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2383 /* This is an undefined situation. We should load into the
2384 address register *and* update that register. Probably
2385 we don't need to handle this at all. */
2386 if (GET_CODE (addr) == PRE_INC)
2387 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2388 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2391 else if (GET_CODE (addr) == PLUS
2392 && GET_CODE (XEXP (addr, 0)) == MULT)
2395 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2397 if (!reg_overlap_mentioned_p (high_reg, addr))
2399 xoperands[0] = high_reg;
2400 xoperands[1] = XEXP (addr, 1);
2401 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2402 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2403 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2405 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2409 xoperands[0] = high_reg;
2410 xoperands[1] = XEXP (addr, 1);
2411 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2412 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2413 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2415 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2420 /* If an operand is an unoffsettable memory ref, find a register
2421 we can increment temporarily to make it refer to the second word. */
2423 if (optype0 == MEMOP)
2424 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2426 if (optype1 == MEMOP)
2427 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2429 /* Ok, we can do one word at a time.
2430 Normally we do the low-numbered word first.
2432 In either case, set up in LATEHALF the operands to use
2433 for the high-numbered word and in some cases alter the
2434 operands in OPERANDS to be suitable for the low-numbered word. */
2436 if (optype0 == REGOP)
2437 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2438 else if (optype0 == OFFSOP)
2439 latehalf[0] = adjust_address (operands[0], SImode, 4);
2441 latehalf[0] = operands[0];
2443 if (optype1 == REGOP)
2444 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2445 else if (optype1 == OFFSOP)
2446 latehalf[1] = adjust_address (operands[1], SImode, 4);
2447 else if (optype1 == CNSTOP)
2448 split_double (operands[1], &operands[1], &latehalf[1]);
2450 latehalf[1] = operands[1];
2452 /* If the first move would clobber the source of the second one,
2453 do them in the other order.
2455 This can happen in two cases:
2457 mem -> register where the first half of the destination register
2458 is the same register used in the memory's address. Reload
2459 can create such insns.
2461 mem in this case will be either register indirect or register
2462 indirect plus a valid offset.
2464 register -> register move where REGNO(dst) == REGNO(src + 1)
2465 someone (Tim/Tege?) claimed this can happen for parameter loads.
2467 Handle mem -> register case first. */
2468 if (optype0 == REGOP
2469 && (optype1 == MEMOP || optype1 == OFFSOP)
2470 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2473 /* Do the late half first. */
2475 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2476 output_asm_insn (singlemove_string (latehalf), latehalf);
2480 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2481 return singlemove_string (operands);
2484 /* Now handle register -> register case. */
2485 if (optype0 == REGOP && optype1 == REGOP
2486 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2488 output_asm_insn (singlemove_string (latehalf), latehalf);
2489 return singlemove_string (operands);
2492 /* Normal case: do the two words, low-numbered first. */
2494 output_asm_insn (singlemove_string (operands), operands);
2496 /* Make any unoffsettable addresses point at high-numbered word. */
2498 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2500 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2503 output_asm_insn (singlemove_string (latehalf), latehalf);
2505 /* Undo the adds we just did. */
2507 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2509 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2515 output_fp_move_double (rtx *operands)
2517 if (FP_REG_P (operands[0]))
2519 if (FP_REG_P (operands[1])
2520 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2521 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2523 output_asm_insn ("fldd%F1 %1,%0", operands);
2525 else if (FP_REG_P (operands[1]))
2527 output_asm_insn ("fstd%F0 %1,%0", operands);
2533 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2535 /* This is a pain. You have to be prepared to deal with an
2536 arbitrary address here including pre/post increment/decrement.
2538 so avoid this in the MD. */
2539 gcc_assert (GET_CODE (operands[0]) == REG);
2541 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2542 xoperands[0] = operands[0];
2543 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2548 /* Return a REG that occurs in ADDR with coefficient 1.
2549 ADDR can be effectively incremented by incrementing REG. */
2552 find_addr_reg (rtx addr)
2554 while (GET_CODE (addr) == PLUS)
2556 if (GET_CODE (XEXP (addr, 0)) == REG)
2557 addr = XEXP (addr, 0);
2558 else if (GET_CODE (XEXP (addr, 1)) == REG)
2559 addr = XEXP (addr, 1);
2560 else if (CONSTANT_P (XEXP (addr, 0)))
2561 addr = XEXP (addr, 1);
2562 else if (CONSTANT_P (XEXP (addr, 1)))
2563 addr = XEXP (addr, 0);
2567 gcc_assert (GET_CODE (addr) == REG);
2571 /* Emit code to perform a block move.
2573 OPERANDS[0] is the destination pointer as a REG, clobbered.
2574 OPERANDS[1] is the source pointer as a REG, clobbered.
2575 OPERANDS[2] is a register for temporary storage.
2576 OPERANDS[3] is a register for temporary storage.
2577 OPERANDS[4] is the size as a CONST_INT
2578 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2579 OPERANDS[6] is another temporary register. */
2582 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2584 int align = INTVAL (operands[5]);
2585 unsigned long n_bytes = INTVAL (operands[4]);
2587 /* We can't move more than a word at a time because the PA
2588 has no longer integer move insns. (Could use fp mem ops?) */
2589 if (align > (TARGET_64BIT ? 8 : 4))
2590 align = (TARGET_64BIT ? 8 : 4);
2592 /* Note that we know each loop below will execute at least twice
2593 (else we would have open-coded the copy). */
2597 /* Pre-adjust the loop counter. */
2598 operands[4] = GEN_INT (n_bytes - 16);
2599 output_asm_insn ("ldi %4,%2", operands);
2602 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2603 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2604 output_asm_insn ("std,ma %3,8(%0)", operands);
2605 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2606 output_asm_insn ("std,ma %6,8(%0)", operands);
2608 /* Handle the residual. There could be up to 7 bytes of
2609 residual to copy! */
2610 if (n_bytes % 16 != 0)
2612 operands[4] = GEN_INT (n_bytes % 8);
2613 if (n_bytes % 16 >= 8)
2614 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2615 if (n_bytes % 8 != 0)
2616 output_asm_insn ("ldd 0(%1),%6", operands);
2617 if (n_bytes % 16 >= 8)
2618 output_asm_insn ("std,ma %3,8(%0)", operands);
2619 if (n_bytes % 8 != 0)
2620 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2625 /* Pre-adjust the loop counter. */
2626 operands[4] = GEN_INT (n_bytes - 8);
2627 output_asm_insn ("ldi %4,%2", operands);
2630 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2631 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2632 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2633 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2634 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2636 /* Handle the residual. There could be up to 7 bytes of
2637 residual to copy! */
2638 if (n_bytes % 8 != 0)
2640 operands[4] = GEN_INT (n_bytes % 4);
2641 if (n_bytes % 8 >= 4)
2642 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2643 if (n_bytes % 4 != 0)
2644 output_asm_insn ("ldw 0(%1),%6", operands);
2645 if (n_bytes % 8 >= 4)
2646 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2647 if (n_bytes % 4 != 0)
2648 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2653 /* Pre-adjust the loop counter. */
2654 operands[4] = GEN_INT (n_bytes - 4);
2655 output_asm_insn ("ldi %4,%2", operands);
2658 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2659 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2660 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2661 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2662 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2664 /* Handle the residual. */
2665 if (n_bytes % 4 != 0)
2667 if (n_bytes % 4 >= 2)
2668 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2669 if (n_bytes % 2 != 0)
2670 output_asm_insn ("ldb 0(%1),%6", operands);
2671 if (n_bytes % 4 >= 2)
2672 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2673 if (n_bytes % 2 != 0)
2674 output_asm_insn ("stb %6,0(%0)", operands);
2679 /* Pre-adjust the loop counter. */
2680 operands[4] = GEN_INT (n_bytes - 2);
2681 output_asm_insn ("ldi %4,%2", operands);
2684 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2685 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2686 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2687 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2688 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2690 /* Handle the residual. */
2691 if (n_bytes % 2 != 0)
2693 output_asm_insn ("ldb 0(%1),%3", operands);
2694 output_asm_insn ("stb %3,0(%0)", operands);
2703 /* Count the number of insns necessary to handle this block move.
2705 Basic structure is the same as emit_block_move, except that we
2706 count insns rather than emit them. */
2709 compute_movmem_length (rtx insn)
2711 rtx pat = PATTERN (insn);
2712 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2713 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2714 unsigned int n_insns = 0;
2716 /* We can't move more than four bytes at a time because the PA
2717 has no longer integer move insns. (Could use fp mem ops?) */
2718 if (align > (TARGET_64BIT ? 8 : 4))
2719 align = (TARGET_64BIT ? 8 : 4);
2721 /* The basic copying loop. */
2725 if (n_bytes % (2 * align) != 0)
2727 if ((n_bytes % (2 * align)) >= align)
2730 if ((n_bytes % align) != 0)
2734 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2738 /* Emit code to perform a block clear.
2740 OPERANDS[0] is the destination pointer as a REG, clobbered.
2741 OPERANDS[1] is a register for temporary storage.
2742 OPERANDS[2] is the size as a CONST_INT
2743 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2746 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2748 int align = INTVAL (operands[3]);
2749 unsigned long n_bytes = INTVAL (operands[2]);
2751 /* We can't clear more than a word at a time because the PA
2752 has no longer integer move insns. */
2753 if (align > (TARGET_64BIT ? 8 : 4))
2754 align = (TARGET_64BIT ? 8 : 4);
2756 /* Note that we know each loop below will execute at least twice
2757 (else we would have open-coded the copy). */
2761 /* Pre-adjust the loop counter. */
2762 operands[2] = GEN_INT (n_bytes - 16);
2763 output_asm_insn ("ldi %2,%1", operands);
2766 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2767 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2768 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2770 /* Handle the residual. There could be up to 7 bytes of
2771 residual to copy! */
2772 if (n_bytes % 16 != 0)
2774 operands[2] = GEN_INT (n_bytes % 8);
2775 if (n_bytes % 16 >= 8)
2776 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2777 if (n_bytes % 8 != 0)
2778 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2783 /* Pre-adjust the loop counter. */
2784 operands[2] = GEN_INT (n_bytes - 8);
2785 output_asm_insn ("ldi %2,%1", operands);
2788 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2789 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2790 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2792 /* Handle the residual. There could be up to 7 bytes of
2793 residual to copy! */
2794 if (n_bytes % 8 != 0)
2796 operands[2] = GEN_INT (n_bytes % 4);
2797 if (n_bytes % 8 >= 4)
2798 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2799 if (n_bytes % 4 != 0)
2800 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2805 /* Pre-adjust the loop counter. */
2806 operands[2] = GEN_INT (n_bytes - 4);
2807 output_asm_insn ("ldi %2,%1", operands);
2810 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2811 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2812 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2814 /* Handle the residual. */
2815 if (n_bytes % 4 != 0)
2817 if (n_bytes % 4 >= 2)
2818 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2819 if (n_bytes % 2 != 0)
2820 output_asm_insn ("stb %%r0,0(%0)", operands);
2825 /* Pre-adjust the loop counter. */
2826 operands[2] = GEN_INT (n_bytes - 2);
2827 output_asm_insn ("ldi %2,%1", operands);
2830 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2831 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2832 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2834 /* Handle the residual. */
2835 if (n_bytes % 2 != 0)
2836 output_asm_insn ("stb %%r0,0(%0)", operands);
2845 /* Count the number of insns necessary to handle this block move.
2847 Basic structure is the same as emit_block_move, except that we
2848 count insns rather than emit them. */
2851 compute_clrmem_length (rtx insn)
2853 rtx pat = PATTERN (insn);
2854 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2855 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2856 unsigned int n_insns = 0;
2858 /* We can't clear more than a word at a time because the PA
2859 has no longer integer move insns. */
2860 if (align > (TARGET_64BIT ? 8 : 4))
2861 align = (TARGET_64BIT ? 8 : 4);
2863 /* The basic loop. */
2867 if (n_bytes % (2 * align) != 0)
2869 if ((n_bytes % (2 * align)) >= align)
2872 if ((n_bytes % align) != 0)
2876 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2882 output_and (rtx *operands)
2884 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2886 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2887 int ls0, ls1, ms0, p, len;
2889 for (ls0 = 0; ls0 < 32; ls0++)
2890 if ((mask & (1 << ls0)) == 0)
2893 for (ls1 = ls0; ls1 < 32; ls1++)
2894 if ((mask & (1 << ls1)) != 0)
2897 for (ms0 = ls1; ms0 < 32; ms0++)
2898 if ((mask & (1 << ms0)) == 0)
2901 gcc_assert (ms0 == 32);
2909 operands[2] = GEN_INT (len);
2910 return "{extru|extrw,u} %1,31,%2,%0";
2914 /* We could use this `depi' for the case above as well, but `depi'
2915 requires one more register file access than an `extru'. */
2920 operands[2] = GEN_INT (p);
2921 operands[3] = GEN_INT (len);
2922 return "{depi|depwi} 0,%2,%3,%0";
2926 return "and %1,%2,%0";
2929 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2930 storing the result in operands[0]. */
2932 output_64bit_and (rtx *operands)
2934 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2936 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2937 int ls0, ls1, ms0, p, len;
2939 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2940 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2943 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2944 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2947 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2948 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2951 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
2953 if (ls1 == HOST_BITS_PER_WIDE_INT)
2959 operands[2] = GEN_INT (len);
2960 return "extrd,u %1,63,%2,%0";
2964 /* We could use this `depi' for the case above as well, but `depi'
2965 requires one more register file access than an `extru'. */
2970 operands[2] = GEN_INT (p);
2971 operands[3] = GEN_INT (len);
2972 return "depdi 0,%2,%3,%0";
2976 return "and %1,%2,%0";
2980 output_ior (rtx *operands)
2982 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2983 int bs0, bs1, p, len;
2985 if (INTVAL (operands[2]) == 0)
2986 return "copy %1,%0";
2988 for (bs0 = 0; bs0 < 32; bs0++)
2989 if ((mask & (1 << bs0)) != 0)
2992 for (bs1 = bs0; bs1 < 32; bs1++)
2993 if ((mask & (1 << bs1)) == 0)
2996 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3001 operands[2] = GEN_INT (p);
3002 operands[3] = GEN_INT (len);
3003 return "{depi|depwi} -1,%2,%3,%0";
3006 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3007 storing the result in operands[0]. */
3009 output_64bit_ior (rtx *operands)
3011 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3012 int bs0, bs1, p, len;
3014 if (INTVAL (operands[2]) == 0)
3015 return "copy %1,%0";
3017 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3018 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3021 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3022 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3025 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3026 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3031 operands[2] = GEN_INT (p);
3032 operands[3] = GEN_INT (len);
3033 return "depdi -1,%2,%3,%0";
3036 /* Target hook for assembling integer objects. This code handles
3037 aligned SI and DI integers specially since function references
3038 must be preceded by P%. */
3041 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3043 if (size == UNITS_PER_WORD
3045 && function_label_operand (x, VOIDmode))
3047 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3048 output_addr_const (asm_out_file, x);
3049 fputc ('\n', asm_out_file);
3052 return default_assemble_integer (x, size, aligned_p);
3055 /* Output an ascii string. */
3057 output_ascii (FILE *file, const char *p, int size)
3061 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3063 /* The HP assembler can only take strings of 256 characters at one
3064 time. This is a limitation on input line length, *not* the
3065 length of the string. Sigh. Even worse, it seems that the
3066 restriction is in number of input characters (see \xnn &
3067 \whatever). So we have to do this very carefully. */
3069 fputs ("\t.STRING \"", file);
3072 for (i = 0; i < size; i += 4)
3076 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3078 register unsigned int c = (unsigned char) p[i + io];
3080 if (c == '\"' || c == '\\')
3081 partial_output[co++] = '\\';
3082 if (c >= ' ' && c < 0177)
3083 partial_output[co++] = c;
3087 partial_output[co++] = '\\';
3088 partial_output[co++] = 'x';
3089 hexd = c / 16 - 0 + '0';
3091 hexd -= '9' - 'a' + 1;
3092 partial_output[co++] = hexd;
3093 hexd = c % 16 - 0 + '0';
3095 hexd -= '9' - 'a' + 1;
3096 partial_output[co++] = hexd;
3099 if (chars_output + co > 243)
3101 fputs ("\"\n\t.STRING \"", file);
3104 fwrite (partial_output, 1, (size_t) co, file);
3108 fputs ("\"\n", file);
3111 /* Try to rewrite floating point comparisons & branches to avoid
3112 useless add,tr insns.
3114 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3115 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3116 first attempt to remove useless add,tr insns. It is zero
3117 for the second pass as reorg sometimes leaves bogus REG_DEAD
3120 When CHECK_NOTES is zero we can only eliminate add,tr insns
3121 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3124 remove_useless_addtr_insns (int check_notes)
3127 static int pass = 0;
3129 /* This is fairly cheap, so always run it when optimizing. */
3133 int fbranch_count = 0;
3135 /* Walk all the insns in this function looking for fcmp & fbranch
3136 instructions. Keep track of how many of each we find. */
3137 for (insn = get_insns (); insn; insn = next_insn (insn))
3141 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3142 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3145 tmp = PATTERN (insn);
3147 /* It must be a set. */
3148 if (GET_CODE (tmp) != SET)
3151 /* If the destination is CCFP, then we've found an fcmp insn. */
3152 tmp = SET_DEST (tmp);
3153 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3159 tmp = PATTERN (insn);
3160 /* If this is an fbranch instruction, bump the fbranch counter. */
3161 if (GET_CODE (tmp) == SET
3162 && SET_DEST (tmp) == pc_rtx
3163 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3164 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3165 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3166 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3174 /* Find all floating point compare + branch insns. If possible,
3175 reverse the comparison & the branch to avoid add,tr insns. */
3176 for (insn = get_insns (); insn; insn = next_insn (insn))
3180 /* Ignore anything that isn't an INSN. */
3181 if (GET_CODE (insn) != INSN)
3184 tmp = PATTERN (insn);
3186 /* It must be a set. */
3187 if (GET_CODE (tmp) != SET)
3190 /* The destination must be CCFP, which is register zero. */
3191 tmp = SET_DEST (tmp);
3192 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3195 /* INSN should be a set of CCFP.
3197 See if the result of this insn is used in a reversed FP
3198 conditional branch. If so, reverse our condition and
3199 the branch. Doing so avoids useless add,tr insns. */
3200 next = next_insn (insn);
3203 /* Jumps, calls and labels stop our search. */
3204 if (GET_CODE (next) == JUMP_INSN
3205 || GET_CODE (next) == CALL_INSN
3206 || GET_CODE (next) == CODE_LABEL)
3209 /* As does another fcmp insn. */
3210 if (GET_CODE (next) == INSN
3211 && GET_CODE (PATTERN (next)) == SET
3212 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3213 && REGNO (SET_DEST (PATTERN (next))) == 0)
3216 next = next_insn (next);
3219 /* Is NEXT_INSN a branch? */
3221 && GET_CODE (next) == JUMP_INSN)
3223 rtx pattern = PATTERN (next);
3225 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3226 and CCFP dies, then reverse our conditional and the branch
3227 to avoid the add,tr. */
3228 if (GET_CODE (pattern) == SET
3229 && SET_DEST (pattern) == pc_rtx
3230 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3231 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3232 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3233 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3234 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3235 && (fcmp_count == fbranch_count
3237 && find_regno_note (next, REG_DEAD, 0))))
3239 /* Reverse the branch. */
3240 tmp = XEXP (SET_SRC (pattern), 1);
3241 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3242 XEXP (SET_SRC (pattern), 2) = tmp;
3243 INSN_CODE (next) = -1;
3245 /* Reverse our condition. */
3246 tmp = PATTERN (insn);
3247 PUT_CODE (XEXP (tmp, 1),
3248 (reverse_condition_maybe_unordered
3249 (GET_CODE (XEXP (tmp, 1)))));
3259 /* You may have trouble believing this, but this is the 32 bit HP-PA
3264 Variable arguments (optional; any number may be allocated)
3266 SP-(4*(N+9)) arg word N
3271 Fixed arguments (must be allocated; may remain unused)
3280 SP-32 External Data Pointer (DP)
3282 SP-24 External/stub RP (RP')
3286 SP-8 Calling Stub RP (RP'')
3291 SP-0 Stack Pointer (points to next available address)
3295 /* This function saves registers as follows. Registers marked with ' are
3296 this function's registers (as opposed to the previous function's).
3297 If a frame_pointer isn't needed, r4 is saved as a general register;
3298 the space for the frame pointer is still allocated, though, to keep
3304 SP (FP') Previous FP
3305 SP + 4 Alignment filler (sigh)
3306 SP + 8 Space for locals reserved here.
3310 SP + n All call saved register used.
3314 SP + o All call saved fp registers used.
3318 SP + p (SP') points to next available address.
3322 /* Global variables set by output_function_prologue(). */
3323 /* Size of frame. Need to know this to emit return insns from
3325 static HOST_WIDE_INT actual_fsize, local_fsize;
3326 static int save_fregs;
3328 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3329 Handle case where DISP > 8k by using the add_high_const patterns.
3331 Note in DISP > 8k case, we will leave the high part of the address
3332 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3335 store_reg (int reg, HOST_WIDE_INT disp, int base)
3337 rtx insn, dest, src, basereg;
3339 src = gen_rtx_REG (word_mode, reg);
3340 basereg = gen_rtx_REG (Pmode, base);
3341 if (VAL_14_BITS_P (disp))
3343 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3344 insn = emit_move_insn (dest, src);
3346 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3348 rtx delta = GEN_INT (disp);
3349 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3351 emit_move_insn (tmpreg, delta);
3352 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3356 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3357 gen_rtx_SET (VOIDmode, tmpreg,
3358 gen_rtx_PLUS (Pmode, basereg, delta)),
3360 RTX_FRAME_RELATED_P (insn) = 1;
3362 dest = gen_rtx_MEM (word_mode, tmpreg);
3363 insn = emit_move_insn (dest, src);
3367 rtx delta = GEN_INT (disp);
3368 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3369 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3371 emit_move_insn (tmpreg, high);
3372 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3373 insn = emit_move_insn (dest, src);
3377 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3378 gen_rtx_SET (VOIDmode,
3379 gen_rtx_MEM (word_mode,
3380 gen_rtx_PLUS (word_mode, basereg,
3388 RTX_FRAME_RELATED_P (insn) = 1;
3391 /* Emit RTL to store REG at the memory location specified by BASE and then
3392 add MOD to BASE. MOD must be <= 8k. */
3395 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3397 rtx insn, basereg, srcreg, delta;
3399 gcc_assert (VAL_14_BITS_P (mod));
3401 basereg = gen_rtx_REG (Pmode, base);
3402 srcreg = gen_rtx_REG (word_mode, reg);
3403 delta = GEN_INT (mod);
3405 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3408 RTX_FRAME_RELATED_P (insn) = 1;
3410 /* RTX_FRAME_RELATED_P must be set on each frame related set
3411 in a parallel with more than one element. */
3412 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3413 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3417 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3418 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3419 whether to add a frame note or not.
3421 In the DISP > 8k case, we leave the high part of the address in %r1.
3422 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3425 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3429 if (VAL_14_BITS_P (disp))
3431 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3432 plus_constant (gen_rtx_REG (Pmode, base), disp));
3434 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3436 rtx basereg = gen_rtx_REG (Pmode, base);
3437 rtx delta = GEN_INT (disp);
3438 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3440 emit_move_insn (tmpreg, delta);
3441 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3442 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3445 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3446 gen_rtx_SET (VOIDmode, tmpreg,
3447 gen_rtx_PLUS (Pmode, basereg, delta)),
3452 rtx basereg = gen_rtx_REG (Pmode, base);
3453 rtx delta = GEN_INT (disp);
3454 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3456 emit_move_insn (tmpreg,
3457 gen_rtx_PLUS (Pmode, basereg,
3458 gen_rtx_HIGH (Pmode, delta)));
3459 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3460 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3463 if (DO_FRAME_NOTES && note)
3464 RTX_FRAME_RELATED_P (insn) = 1;
3468 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3473 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3474 be consistent with the rounding and size calculation done here.
3475 Change them at the same time. */
3477 /* We do our own stack alignment. First, round the size of the
3478 stack locals up to a word boundary. */
3479 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3481 /* Space for previous frame pointer + filler. If any frame is
3482 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3483 waste some space here for the sake of HP compatibility. The
3484 first slot is only used when the frame pointer is needed. */
3485 if (size || frame_pointer_needed)
3486 size += STARTING_FRAME_OFFSET;
3488 /* If the current function calls __builtin_eh_return, then we need
3489 to allocate stack space for registers that will hold data for
3490 the exception handler. */
3491 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3495 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3497 size += i * UNITS_PER_WORD;
3500 /* Account for space used by the callee general register saves. */
3501 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3502 if (df_regs_ever_live_p (i))
3503 size += UNITS_PER_WORD;
3505 /* Account for space used by the callee floating point register saves. */
3506 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3507 if (df_regs_ever_live_p (i)
3508 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3512 /* We always save both halves of the FP register, so always
3513 increment the frame size by 8 bytes. */
3517 /* If any of the floating registers are saved, account for the
3518 alignment needed for the floating point register save block. */
3521 size = (size + 7) & ~7;
3526 /* The various ABIs include space for the outgoing parameters in the
3527 size of the current function's stack frame. We don't need to align
3528 for the outgoing arguments as their alignment is set by the final
3529 rounding for the frame as a whole. */
3530 size += current_function_outgoing_args_size;
3532 /* Allocate space for the fixed frame marker. This space must be
3533 allocated for any function that makes calls or allocates
3535 if (!current_function_is_leaf || size)
3536 size += TARGET_64BIT ? 48 : 32;
3538 /* Finally, round to the preferred stack boundary. */
3539 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3540 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3543 /* Generate the assembly code for function entry. FILE is a stdio
3544 stream to output the code to. SIZE is an int: how many units of
3545 temporary storage to allocate.
3547 Refer to the array `regs_ever_live' to determine which registers to
3548 save; `regs_ever_live[I]' is nonzero if register number I is ever
3549 used in the function. This function is responsible for knowing
3550 which registers should not be saved even if used. */
3552 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3553 of memory. If any fpu reg is used in the function, we allocate
3554 such a block here, at the bottom of the frame, just in case it's needed.
3556 If this function is a leaf procedure, then we may choose not
3557 to do a "save" insn. The decision about whether or not
3558 to do this is made in regclass.c. */
3561 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3563 /* The function's label and associated .PROC must never be
3564 separated and must be output *after* any profiling declarations
3565 to avoid changing spaces/subspaces within a procedure. */
3566 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3567 fputs ("\t.PROC\n", file);
3569 /* hppa_expand_prologue does the dirty work now. We just need
3570 to output the assembler directives which denote the start
3572 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3573 if (current_function_is_leaf)
3574 fputs (",NO_CALLS", file);
3576 fputs (",CALLS", file);
3578 fputs (",SAVE_RP", file);
3580 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3581 at the beginning of the frame and that it is used as the frame
3582 pointer for the frame. We do this because our current frame
3583 layout doesn't conform to that specified in the HP runtime
3584 documentation and we need a way to indicate to programs such as
3585 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3586 isn't used by HP compilers but is supported by the assembler.
3587 However, SAVE_SP is supposed to indicate that the previous stack
3588 pointer has been saved in the frame marker. */
3589 if (frame_pointer_needed)
3590 fputs (",SAVE_SP", file);
3592 /* Pass on information about the number of callee register saves
3593 performed in the prologue.
3595 The compiler is supposed to pass the highest register number
3596 saved, the assembler then has to adjust that number before
3597 entering it into the unwind descriptor (to account for any
3598 caller saved registers with lower register numbers than the
3599 first callee saved register). */
3601 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3604 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3606 fputs ("\n\t.ENTRY\n", file);
3608 remove_useless_addtr_insns (0);
3612 hppa_expand_prologue (void)
3614 int merge_sp_adjust_with_store = 0;
3615 HOST_WIDE_INT size = get_frame_size ();
3616 HOST_WIDE_INT offset;
3624 /* Compute total size for frame pointer, filler, locals and rounding to
3625 the next word boundary. Similar code appears in compute_frame_size
3626 and must be changed in tandem with this code. */
3627 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3628 if (local_fsize || frame_pointer_needed)
3629 local_fsize += STARTING_FRAME_OFFSET;
3631 actual_fsize = compute_frame_size (size, &save_fregs);
3633 /* Compute a few things we will use often. */
3634 tmpreg = gen_rtx_REG (word_mode, 1);
3636 /* Save RP first. The calling conventions manual states RP will
3637 always be stored into the caller's frame at sp - 20 or sp - 16
3638 depending on which ABI is in use. */
3639 if (df_regs_ever_live_p (2) || current_function_calls_eh_return)
3641 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3647 /* Allocate the local frame and set up the frame pointer if needed. */
3648 if (actual_fsize != 0)
3650 if (frame_pointer_needed)
3652 /* Copy the old frame pointer temporarily into %r1. Set up the
3653 new stack pointer, then store away the saved old frame pointer
3654 into the stack at sp and at the same time update the stack
3655 pointer by actual_fsize bytes. Two versions, first
3656 handles small (<8k) frames. The second handles large (>=8k)
3658 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3660 RTX_FRAME_RELATED_P (insn) = 1;
3662 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3664 RTX_FRAME_RELATED_P (insn) = 1;
3666 if (VAL_14_BITS_P (actual_fsize))
3667 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3670 /* It is incorrect to store the saved frame pointer at *sp,
3671 then increment sp (writes beyond the current stack boundary).
3673 So instead use stwm to store at *sp and post-increment the
3674 stack pointer as an atomic operation. Then increment sp to
3675 finish allocating the new frame. */
3676 HOST_WIDE_INT adjust1 = 8192 - 64;
3677 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3679 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3680 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3684 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3685 we need to store the previous stack pointer (frame pointer)
3686 into the frame marker on targets that use the HP unwind
3687 library. This allows the HP unwind library to be used to
3688 unwind GCC frames. However, we are not fully compatible
3689 with the HP library because our frame layout differs from
3690 that specified in the HP runtime specification.
3692 We don't want a frame note on this instruction as the frame
3693 marker moves during dynamic stack allocation.
3695 This instruction also serves as a blockage to prevent
3696 register spills from being scheduled before the stack
3697 pointer is raised. This is necessary as we store
3698 registers using the frame pointer as a base register,
3699 and the frame pointer is set before sp is raised. */
3700 if (TARGET_HPUX_UNWIND_LIBRARY)
3702 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3703 GEN_INT (TARGET_64BIT ? -8 : -4));
3705 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3709 emit_insn (gen_blockage ());
3711 /* no frame pointer needed. */
3714 /* In some cases we can perform the first callee register save
3715 and allocating the stack frame at the same time. If so, just
3716 make a note of it and defer allocating the frame until saving
3717 the callee registers. */
3718 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3719 merge_sp_adjust_with_store = 1;
3720 /* Can not optimize. Adjust the stack frame by actual_fsize
3723 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3728 /* Normal register save.
3730 Do not save the frame pointer in the frame_pointer_needed case. It
3731 was done earlier. */
3732 if (frame_pointer_needed)
3734 offset = local_fsize;
3736 /* Saving the EH return data registers in the frame is the simplest
3737 way to get the frame unwind information emitted. We put them
3738 just before the general registers. */
3739 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3741 unsigned int i, regno;
3745 regno = EH_RETURN_DATA_REGNO (i);
3746 if (regno == INVALID_REGNUM)
3749 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3750 offset += UNITS_PER_WORD;
3754 for (i = 18; i >= 4; i--)
3755 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3757 store_reg (i, offset, FRAME_POINTER_REGNUM);
3758 offset += UNITS_PER_WORD;
3761 /* Account for %r3 which is saved in a special place. */
3764 /* No frame pointer needed. */
3767 offset = local_fsize - actual_fsize;
3769 /* Saving the EH return data registers in the frame is the simplest
3770 way to get the frame unwind information emitted. */
3771 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3773 unsigned int i, regno;
3777 regno = EH_RETURN_DATA_REGNO (i);
3778 if (regno == INVALID_REGNUM)
3781 /* If merge_sp_adjust_with_store is nonzero, then we can
3782 optimize the first save. */
3783 if (merge_sp_adjust_with_store)
3785 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3786 merge_sp_adjust_with_store = 0;
3789 store_reg (regno, offset, STACK_POINTER_REGNUM);
3790 offset += UNITS_PER_WORD;
3794 for (i = 18; i >= 3; i--)
3795 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3797 /* If merge_sp_adjust_with_store is nonzero, then we can
3798 optimize the first GR save. */
3799 if (merge_sp_adjust_with_store)
3801 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3802 merge_sp_adjust_with_store = 0;
3805 store_reg (i, offset, STACK_POINTER_REGNUM);
3806 offset += UNITS_PER_WORD;
3810 /* If we wanted to merge the SP adjustment with a GR save, but we never
3811 did any GR saves, then just emit the adjustment here. */
3812 if (merge_sp_adjust_with_store)
3813 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3817 /* The hppa calling conventions say that %r19, the pic offset
3818 register, is saved at sp - 32 (in this function's frame)
3819 when generating PIC code. FIXME: What is the correct thing
3820 to do for functions which make no calls and allocate no
3821 frame? Do we need to allocate a frame, or can we just omit
3822 the save? For now we'll just omit the save.
3824 We don't want a note on this insn as the frame marker can
3825 move if there is a dynamic stack allocation. */
3826 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3828 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3830 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3834 /* Align pointer properly (doubleword boundary). */
3835 offset = (offset + 7) & ~7;
3837 /* Floating point register store. */
3842 /* First get the frame or stack pointer to the start of the FP register
3844 if (frame_pointer_needed)
3846 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3847 base = frame_pointer_rtx;
3851 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3852 base = stack_pointer_rtx;
3855 /* Now actually save the FP registers. */
3856 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3858 if (df_regs_ever_live_p (i)
3859 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3861 rtx addr, insn, reg;
3862 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3863 reg = gen_rtx_REG (DFmode, i);
3864 insn = emit_move_insn (addr, reg);
3867 RTX_FRAME_RELATED_P (insn) = 1;
3870 rtx mem = gen_rtx_MEM (DFmode,
3871 plus_constant (base, offset));
3873 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3874 gen_rtx_SET (VOIDmode, mem, reg),
3879 rtx meml = gen_rtx_MEM (SFmode,
3880 plus_constant (base, offset));
3881 rtx memr = gen_rtx_MEM (SFmode,
3882 plus_constant (base, offset + 4));
3883 rtx regl = gen_rtx_REG (SFmode, i);
3884 rtx regr = gen_rtx_REG (SFmode, i + 1);
3885 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3886 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3889 RTX_FRAME_RELATED_P (setl) = 1;
3890 RTX_FRAME_RELATED_P (setr) = 1;
3891 vec = gen_rtvec (2, setl, setr);
3893 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3894 gen_rtx_SEQUENCE (VOIDmode, vec),
3898 offset += GET_MODE_SIZE (DFmode);
3905 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3906 Handle case where DISP > 8k by using the add_high_const patterns. */
3909 load_reg (int reg, HOST_WIDE_INT disp, int base)
3911 rtx dest = gen_rtx_REG (word_mode, reg);
3912 rtx basereg = gen_rtx_REG (Pmode, base);
3915 if (VAL_14_BITS_P (disp))
3916 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3917 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3919 rtx delta = GEN_INT (disp);
3920 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3922 emit_move_insn (tmpreg, delta);
3923 if (TARGET_DISABLE_INDEXING)
3925 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3926 src = gen_rtx_MEM (word_mode, tmpreg);
3929 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3933 rtx delta = GEN_INT (disp);
3934 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3935 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3937 emit_move_insn (tmpreg, high);
3938 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3941 emit_move_insn (dest, src);
3944 /* Update the total code bytes output to the text section. */
3947 update_total_code_bytes (int nbytes)
3949 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
3950 && !IN_NAMED_SECTION_P (cfun->decl))
3952 if (INSN_ADDRESSES_SET_P ())
3954 unsigned long old_total = total_code_bytes;
3956 total_code_bytes += nbytes;
3958 /* Be prepared to handle overflows. */
3959 if (old_total > total_code_bytes)
3960 total_code_bytes = -1;
3963 total_code_bytes = -1;
3967 /* This function generates the assembly code for function exit.
3968 Args are as for output_function_prologue ().
3970 The function epilogue should not depend on the current stack
3971 pointer! It should use the frame pointer only. This is mandatory
3972 because of alloca; we also take advantage of it to omit stack
3973 adjustments before returning. */
3976 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3978 rtx insn = get_last_insn ();
3982 /* hppa_expand_epilogue does the dirty work now. We just need
3983 to output the assembler directives which denote the end
3986 To make debuggers happy, emit a nop if the epilogue was completely
3987 eliminated due to a volatile call as the last insn in the
3988 current function. That way the return address (in %r2) will
3989 always point to a valid instruction in the current function. */
3991 /* Get the last real insn. */
3992 if (GET_CODE (insn) == NOTE)
3993 insn = prev_real_insn (insn);
3995 /* If it is a sequence, then look inside. */
3996 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3997 insn = XVECEXP (PATTERN (insn), 0, 0);
3999 /* If insn is a CALL_INSN, then it must be a call to a volatile
4000 function (otherwise there would be epilogue insns). */
4001 if (insn && GET_CODE (insn) == CALL_INSN)
4003 fputs ("\tnop\n", file);
4007 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4009 if (TARGET_SOM && TARGET_GAS)
4011 /* We done with this subspace except possibly for some additional
4012 debug information. Forget that we are in this subspace to ensure
4013 that the next function is output in its own subspace. */
4015 cfun->machine->in_nsubspa = 2;
4018 if (INSN_ADDRESSES_SET_P ())
4020 insn = get_last_nonnote_insn ();
4021 last_address += INSN_ADDRESSES (INSN_UID (insn));
4023 last_address += insn_default_length (insn);
4024 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4025 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4028 /* Finally, update the total number of code bytes output so far. */
4029 update_total_code_bytes (last_address);
4033 hppa_expand_epilogue (void)
4036 HOST_WIDE_INT offset;
4037 HOST_WIDE_INT ret_off = 0;
4039 int merge_sp_adjust_with_load = 0;
4041 /* We will use this often. */
4042 tmpreg = gen_rtx_REG (word_mode, 1);
4044 /* Try to restore RP early to avoid load/use interlocks when
4045 RP gets used in the return (bv) instruction. This appears to still
4046 be necessary even when we schedule the prologue and epilogue. */
4049 ret_off = TARGET_64BIT ? -16 : -20;
4050 if (frame_pointer_needed)
4052 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4057 /* No frame pointer, and stack is smaller than 8k. */
4058 if (VAL_14_BITS_P (ret_off - actual_fsize))
4060 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4066 /* General register restores. */
4067 if (frame_pointer_needed)
4069 offset = local_fsize;
4071 /* If the current function calls __builtin_eh_return, then we need
4072 to restore the saved EH data registers. */
4073 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4075 unsigned int i, regno;
4079 regno = EH_RETURN_DATA_REGNO (i);
4080 if (regno == INVALID_REGNUM)
4083 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4084 offset += UNITS_PER_WORD;
4088 for (i = 18; i >= 4; i--)
4089 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4091 load_reg (i, offset, FRAME_POINTER_REGNUM);
4092 offset += UNITS_PER_WORD;
4097 offset = local_fsize - actual_fsize;
4099 /* If the current function calls __builtin_eh_return, then we need
4100 to restore the saved EH data registers. */
4101 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4103 unsigned int i, regno;
4107 regno = EH_RETURN_DATA_REGNO (i);
4108 if (regno == INVALID_REGNUM)
4111 /* Only for the first load.
4112 merge_sp_adjust_with_load holds the register load
4113 with which we will merge the sp adjustment. */
4114 if (merge_sp_adjust_with_load == 0
4116 && VAL_14_BITS_P (-actual_fsize))
4117 merge_sp_adjust_with_load = regno;
4119 load_reg (regno, offset, STACK_POINTER_REGNUM);
4120 offset += UNITS_PER_WORD;
4124 for (i = 18; i >= 3; i--)
4126 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4128 /* Only for the first load.
4129 merge_sp_adjust_with_load holds the register load
4130 with which we will merge the sp adjustment. */
4131 if (merge_sp_adjust_with_load == 0
4133 && VAL_14_BITS_P (-actual_fsize))
4134 merge_sp_adjust_with_load = i;
4136 load_reg (i, offset, STACK_POINTER_REGNUM);
4137 offset += UNITS_PER_WORD;
4142 /* Align pointer properly (doubleword boundary). */
4143 offset = (offset + 7) & ~7;
4145 /* FP register restores. */
4148 /* Adjust the register to index off of. */
4149 if (frame_pointer_needed)
4150 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4152 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4154 /* Actually do the restores now. */
4155 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4156 if (df_regs_ever_live_p (i)
4157 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4159 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4160 rtx dest = gen_rtx_REG (DFmode, i);
4161 emit_move_insn (dest, src);
4165 /* Emit a blockage insn here to keep these insns from being moved to
4166 an earlier spot in the epilogue, or into the main instruction stream.
4168 This is necessary as we must not cut the stack back before all the
4169 restores are finished. */
4170 emit_insn (gen_blockage ());
4172 /* Reset stack pointer (and possibly frame pointer). The stack
4173 pointer is initially set to fp + 64 to avoid a race condition. */
4174 if (frame_pointer_needed)
4176 rtx delta = GEN_INT (-64);
4178 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4179 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4181 /* If we were deferring a callee register restore, do it now. */
4182 else if (merge_sp_adjust_with_load)
4184 rtx delta = GEN_INT (-actual_fsize);
4185 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4187 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4189 else if (actual_fsize != 0)
4190 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4193 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4194 frame greater than 8k), do so now. */
4196 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4198 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4200 rtx sa = EH_RETURN_STACKADJ_RTX;
4202 emit_insn (gen_blockage ());
4203 emit_insn (TARGET_64BIT
4204 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4205 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4210 hppa_pic_save_rtx (void)
4212 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4215 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4216 #define NO_DEFERRED_PROFILE_COUNTERS 0
4220 /* Vector of funcdef numbers. */
4221 static VEC(int,heap) *funcdef_nos;
4223 /* Output deferred profile counters. */
4225 output_deferred_profile_counters (void)
4230 if (VEC_empty (int, funcdef_nos))
4233 switch_to_section (data_section);
4234 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4235 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4237 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4239 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4240 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4243 VEC_free (int, heap, funcdef_nos);
4247 hppa_profile_hook (int label_no)
4249 /* We use SImode for the address of the function in both 32 and
4250 64-bit code to avoid having to provide DImode versions of the
4251 lcla2 and load_offset_label_address insn patterns. */
4252 rtx reg = gen_reg_rtx (SImode);
4253 rtx label_rtx = gen_label_rtx ();
4254 rtx begin_label_rtx, call_insn;
4255 char begin_label_name[16];
4257 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4259 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4262 emit_move_insn (arg_pointer_rtx,
4263 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4266 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4268 /* The address of the function is loaded into %r25 with an instruction-
4269 relative sequence that avoids the use of relocations. The sequence
4270 is split so that the load_offset_label_address instruction can
4271 occupy the delay slot of the call to _mcount. */
4273 emit_insn (gen_lcla2 (reg, label_rtx));
4275 emit_insn (gen_lcla1 (reg, label_rtx));
4277 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4278 reg, begin_label_rtx, label_rtx));
4280 #if !NO_DEFERRED_PROFILE_COUNTERS
4282 rtx count_label_rtx, addr, r24;
4283 char count_label_name[16];
4285 VEC_safe_push (int, heap, funcdef_nos, label_no);
4286 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4287 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4289 addr = force_reg (Pmode, count_label_rtx);
4290 r24 = gen_rtx_REG (Pmode, 24);
4291 emit_move_insn (r24, addr);
4294 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4295 gen_rtx_SYMBOL_REF (Pmode,
4297 GEN_INT (TARGET_64BIT ? 24 : 12)));
4299 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4304 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4305 gen_rtx_SYMBOL_REF (Pmode,
4307 GEN_INT (TARGET_64BIT ? 16 : 8)));
4311 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4312 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4314 /* Indicate the _mcount call cannot throw, nor will it execute a
4316 REG_NOTES (call_insn)
4317 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
4320 /* Fetch the return address for the frame COUNT steps up from
4321 the current frame, after the prologue. FRAMEADDR is the
4322 frame pointer of the COUNT frame.
4324 We want to ignore any export stub remnants here. To handle this,
4325 we examine the code at the return address, and if it is an export
4326 stub, we return a memory rtx for the stub return address stored
4329 The value returned is used in two different ways:
4331 1. To find a function's caller.
4333 2. To change the return address for a function.
4335 This function handles most instances of case 1; however, it will
4336 fail if there are two levels of stubs to execute on the return
4337 path. The only way I believe that can happen is if the return value
4338 needs a parameter relocation, which never happens for C code.
4340 This function handles most instances of case 2; however, it will
4341 fail if we did not originally have stub code on the return path
4342 but will need stub code on the new return path. This can happen if
4343 the caller & callee are both in the main program, but the new
4344 return location is in a shared library. */
4347 return_addr_rtx (int count, rtx frameaddr)
4357 rp = get_hard_reg_initial_val (Pmode, 2);
4359 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4362 saved_rp = gen_reg_rtx (Pmode);
4363 emit_move_insn (saved_rp, rp);
4365 /* Get pointer to the instruction stream. We have to mask out the
4366 privilege level from the two low order bits of the return address
4367 pointer here so that ins will point to the start of the first
4368 instruction that would have been executed if we returned. */
4369 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4370 label = gen_label_rtx ();
4372 /* Check the instruction stream at the normal return address for the
4375 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4376 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4377 0x00011820 | stub+16: mtsp r1,sr0
4378 0xe0400002 | stub+20: be,n 0(sr0,rp)
4380 If it is an export stub, than our return address is really in
4383 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4384 NULL_RTX, SImode, 1);
4385 emit_jump_insn (gen_bne (label));
4387 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4388 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4389 emit_jump_insn (gen_bne (label));
4391 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4392 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4393 emit_jump_insn (gen_bne (label));
4395 /* 0xe0400002 must be specified as -532676606 so that it won't be
4396 rejected as an invalid immediate operand on 64-bit hosts. */
4397 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4398 GEN_INT (-532676606), NE, NULL_RTX, SImode, 1);
4400 /* If there is no export stub then just use the value saved from
4401 the return pointer register. */
4403 emit_jump_insn (gen_bne (label));
4405 /* Here we know that our return address points to an export
4406 stub. We don't want to return the address of the export stub,
4407 but rather the return address of the export stub. That return
4408 address is stored at -24[frameaddr]. */
4410 emit_move_insn (saved_rp,
4412 memory_address (Pmode,
4413 plus_constant (frameaddr,
4421 emit_bcond_fp (enum rtx_code code, rtx operand0)
4423 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4424 gen_rtx_IF_THEN_ELSE (VOIDmode,
4425 gen_rtx_fmt_ee (code,
4427 gen_rtx_REG (CCFPmode, 0),
4429 gen_rtx_LABEL_REF (VOIDmode, operand0),
4435 gen_cmp_fp (enum rtx_code code, rtx operand0, rtx operand1)
4437 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4438 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4441 /* Adjust the cost of a scheduling dependency. Return the new cost of
4442 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4445 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4447 enum attr_type attr_type;
4449 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4450 true dependencies as they are described with bypasses now. */
4451 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4454 if (! recog_memoized (insn))
4457 attr_type = get_attr_type (insn);
4459 switch (REG_NOTE_KIND (link))
4462 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4465 if (attr_type == TYPE_FPLOAD)
4467 rtx pat = PATTERN (insn);
4468 rtx dep_pat = PATTERN (dep_insn);
4469 if (GET_CODE (pat) == PARALLEL)
4471 /* This happens for the fldXs,mb patterns. */
4472 pat = XVECEXP (pat, 0, 0);
4474 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4475 /* If this happens, we have to extend this to schedule
4476 optimally. Return 0 for now. */
4479 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4481 if (! recog_memoized (dep_insn))
4483 switch (get_attr_type (dep_insn))
4490 case TYPE_FPSQRTSGL:
4491 case TYPE_FPSQRTDBL:
4492 /* A fpload can't be issued until one cycle before a
4493 preceding arithmetic operation has finished if
4494 the target of the fpload is any of the sources
4495 (or destination) of the arithmetic operation. */
4496 return insn_default_latency (dep_insn) - 1;
4503 else if (attr_type == TYPE_FPALU)
4505 rtx pat = PATTERN (insn);
4506 rtx dep_pat = PATTERN (dep_insn);
4507 if (GET_CODE (pat) == PARALLEL)
4509 /* This happens for the fldXs,mb patterns. */
4510 pat = XVECEXP (pat, 0, 0);
4512 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4513 /* If this happens, we have to extend this to schedule
4514 optimally. Return 0 for now. */
4517 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4519 if (! recog_memoized (dep_insn))
4521 switch (get_attr_type (dep_insn))
4525 case TYPE_FPSQRTSGL:
4526 case TYPE_FPSQRTDBL:
4527 /* An ALU flop can't be issued until two cycles before a
4528 preceding divide or sqrt operation has finished if
4529 the target of the ALU flop is any of the sources
4530 (or destination) of the divide or sqrt operation. */
4531 return insn_default_latency (dep_insn) - 2;
4539 /* For other anti dependencies, the cost is 0. */
4542 case REG_DEP_OUTPUT:
4543 /* Output dependency; DEP_INSN writes a register that INSN writes some
4545 if (attr_type == TYPE_FPLOAD)
4547 rtx pat = PATTERN (insn);
4548 rtx dep_pat = PATTERN (dep_insn);
4549 if (GET_CODE (pat) == PARALLEL)
4551 /* This happens for the fldXs,mb patterns. */
4552 pat = XVECEXP (pat, 0, 0);
4554 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4555 /* If this happens, we have to extend this to schedule
4556 optimally. Return 0 for now. */
4559 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4561 if (! recog_memoized (dep_insn))
4563 switch (get_attr_type (dep_insn))
4570 case TYPE_FPSQRTSGL:
4571 case TYPE_FPSQRTDBL:
4572 /* A fpload can't be issued until one cycle before a
4573 preceding arithmetic operation has finished if
4574 the target of the fpload is the destination of the
4575 arithmetic operation.
4577 Exception: For PA7100LC, PA7200 and PA7300, the cost
4578 is 3 cycles, unless they bundle together. We also
4579 pay the penalty if the second insn is a fpload. */
4580 return insn_default_latency (dep_insn) - 1;
4587 else if (attr_type == TYPE_FPALU)
4589 rtx pat = PATTERN (insn);
4590 rtx dep_pat = PATTERN (dep_insn);
4591 if (GET_CODE (pat) == PARALLEL)
4593 /* This happens for the fldXs,mb patterns. */
4594 pat = XVECEXP (pat, 0, 0);
4596 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4597 /* If this happens, we have to extend this to schedule
4598 optimally. Return 0 for now. */
4601 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4603 if (! recog_memoized (dep_insn))
4605 switch (get_attr_type (dep_insn))
4609 case TYPE_FPSQRTSGL:
4610 case TYPE_FPSQRTDBL:
4611 /* An ALU flop can't be issued until two cycles before a
4612 preceding divide or sqrt operation has finished if
4613 the target of the ALU flop is also the target of
4614 the divide or sqrt operation. */
4615 return insn_default_latency (dep_insn) - 2;
4623 /* For other output dependencies, the cost is 0. */
4631 /* Adjust scheduling priorities. We use this to try and keep addil
4632 and the next use of %r1 close together. */
4634 pa_adjust_priority (rtx insn, int priority)
4636 rtx set = single_set (insn);
4640 src = SET_SRC (set);
4641 dest = SET_DEST (set);
4642 if (GET_CODE (src) == LO_SUM
4643 && symbolic_operand (XEXP (src, 1), VOIDmode)
4644 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4647 else if (GET_CODE (src) == MEM
4648 && GET_CODE (XEXP (src, 0)) == LO_SUM
4649 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4650 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4653 else if (GET_CODE (dest) == MEM
4654 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4655 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4656 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4662 /* The 700 can only issue a single insn at a time.
4663 The 7XXX processors can issue two insns at a time.
4664 The 8000 can issue 4 insns at a time. */
4666 pa_issue_rate (void)
4670 case PROCESSOR_700: return 1;
4671 case PROCESSOR_7100: return 2;
4672 case PROCESSOR_7100LC: return 2;
4673 case PROCESSOR_7200: return 2;
4674 case PROCESSOR_7300: return 2;
4675 case PROCESSOR_8000: return 4;
4684 /* Return any length adjustment needed by INSN which already has its length
4685 computed as LENGTH. Return zero if no adjustment is necessary.
4687 For the PA: function calls, millicode calls, and backwards short
4688 conditional branches with unfilled delay slots need an adjustment by +1
4689 (to account for the NOP which will be inserted into the instruction stream).
4691 Also compute the length of an inline block move here as it is too
4692 complicated to express as a length attribute in pa.md. */
4694 pa_adjust_insn_length (rtx insn, int length)
4696 rtx pat = PATTERN (insn);
4698 /* Jumps inside switch tables which have unfilled delay slots need
4700 if (GET_CODE (insn) == JUMP_INSN
4701 && GET_CODE (pat) == PARALLEL
4702 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4704 /* Millicode insn with an unfilled delay slot. */
4705 else if (GET_CODE (insn) == INSN
4706 && GET_CODE (pat) != SEQUENCE
4707 && GET_CODE (pat) != USE
4708 && GET_CODE (pat) != CLOBBER
4709 && get_attr_type (insn) == TYPE_MILLI)
4711 /* Block move pattern. */
4712 else if (GET_CODE (insn) == INSN
4713 && GET_CODE (pat) == PARALLEL
4714 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4715 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4716 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4717 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4718 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4719 return compute_movmem_length (insn) - 4;
4720 /* Block clear pattern. */
4721 else if (GET_CODE (insn) == INSN
4722 && GET_CODE (pat) == PARALLEL
4723 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4724 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4725 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4726 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4727 return compute_clrmem_length (insn) - 4;
4728 /* Conditional branch with an unfilled delay slot. */
4729 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4731 /* Adjust a short backwards conditional with an unfilled delay slot. */
4732 if (GET_CODE (pat) == SET
4734 && ! forward_branch_p (insn))
4736 else if (GET_CODE (pat) == PARALLEL
4737 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4740 /* Adjust dbra insn with short backwards conditional branch with
4741 unfilled delay slot -- only for case where counter is in a
4742 general register register. */
4743 else if (GET_CODE (pat) == PARALLEL
4744 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4745 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4746 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4748 && ! forward_branch_p (insn))
4756 /* Print operand X (an rtx) in assembler syntax to file FILE.
4757 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4758 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4761 print_operand (FILE *file, rtx x, int code)
4766 /* Output a 'nop' if there's nothing for the delay slot. */
4767 if (dbr_sequence_length () == 0)
4768 fputs ("\n\tnop", file);
4771 /* Output a nullification completer if there's nothing for the */
4772 /* delay slot or nullification is requested. */
4773 if (dbr_sequence_length () == 0 ||
4775 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4779 /* Print out the second register name of a register pair.
4780 I.e., R (6) => 7. */
4781 fputs (reg_names[REGNO (x) + 1], file);
4784 /* A register or zero. */
4786 || (x == CONST0_RTX (DFmode))
4787 || (x == CONST0_RTX (SFmode)))
4789 fputs ("%r0", file);
4795 /* A register or zero (floating point). */
4797 || (x == CONST0_RTX (DFmode))
4798 || (x == CONST0_RTX (SFmode)))
4800 fputs ("%fr0", file);
4809 xoperands[0] = XEXP (XEXP (x, 0), 0);
4810 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4811 output_global_address (file, xoperands[1], 0);
4812 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4816 case 'C': /* Plain (C)ondition */
4818 switch (GET_CODE (x))
4821 fputs ("=", file); break;
4823 fputs ("<>", file); break;
4825 fputs (">", file); break;
4827 fputs (">=", file); break;
4829 fputs (">>=", file); break;
4831 fputs (">>", file); break;
4833 fputs ("<", file); break;
4835 fputs ("<=", file); break;
4837 fputs ("<<=", file); break;
4839 fputs ("<<", file); break;
4844 case 'N': /* Condition, (N)egated */
4845 switch (GET_CODE (x))
4848 fputs ("<>", file); break;
4850 fputs ("=", file); break;
4852 fputs ("<=", file); break;
4854 fputs ("<", file); break;
4856 fputs ("<<", file); break;
4858 fputs ("<<=", file); break;
4860 fputs (">=", file); break;
4862 fputs (">", file); break;
4864 fputs (">>", file); break;
4866 fputs (">>=", file); break;
4871 /* For floating point comparisons. Note that the output
4872 predicates are the complement of the desired mode. The
4873 conditions for GT, GE, LT, LE and LTGT cause an invalid
4874 operation exception if the result is unordered and this
4875 exception is enabled in the floating-point status register. */
4877 switch (GET_CODE (x))
4880 fputs ("!=", file); break;
4882 fputs ("=", file); break;
4884 fputs ("!>", file); break;
4886 fputs ("!>=", file); break;
4888 fputs ("!<", file); break;
4890 fputs ("!<=", file); break;
4892 fputs ("!<>", file); break;
4894 fputs ("!?<=", file); break;
4896 fputs ("!?<", file); break;
4898 fputs ("!?>=", file); break;
4900 fputs ("!?>", file); break;
4902 fputs ("!?=", file); break;
4904 fputs ("!?", file); break;
4906 fputs ("?", file); break;
4911 case 'S': /* Condition, operands are (S)wapped. */
4912 switch (GET_CODE (x))
4915 fputs ("=", file); break;
4917 fputs ("<>", file); break;
4919 fputs ("<", file); break;
4921 fputs ("<=", file); break;
4923 fputs ("<<=", file); break;
4925 fputs ("<<", file); break;
4927 fputs (">", file); break;
4929 fputs (">=", file); break;
4931 fputs (">>=", file); break;
4933 fputs (">>", file); break;
4938 case 'B': /* Condition, (B)oth swapped and negate. */
4939 switch (GET_CODE (x))
4942 fputs ("<>", file); break;
4944 fputs ("=", file); break;
4946 fputs (">=", file); break;
4948 fputs (">", file); break;
4950 fputs (">>", file); break;
4952 fputs (">>=", file); break;
4954 fputs ("<=", file); break;
4956 fputs ("<", file); break;
4958 fputs ("<<", file); break;
4960 fputs ("<<=", file); break;
4966 gcc_assert (GET_CODE (x) == CONST_INT);
4967 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4970 gcc_assert (GET_CODE (x) == CONST_INT);
4971 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4974 gcc_assert (GET_CODE (x) == CONST_INT);
4975 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4978 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
4979 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4982 gcc_assert (GET_CODE (x) == CONST_INT);
4983 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4986 gcc_assert (GET_CODE (x) == CONST_INT);
4987 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4990 if (GET_CODE (x) == CONST_INT)
4995 switch (GET_CODE (XEXP (x, 0)))
4999 if (ASSEMBLER_DIALECT == 0)
5000 fputs ("s,mb", file);
5002 fputs (",mb", file);
5006 if (ASSEMBLER_DIALECT == 0)
5007 fputs ("s,ma", file);
5009 fputs (",ma", file);
5012 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5013 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5015 if (ASSEMBLER_DIALECT == 0)
5018 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5019 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5021 if (ASSEMBLER_DIALECT == 0)
5022 fputs ("x,s", file);
5026 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5030 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5036 output_global_address (file, x, 0);
5039 output_global_address (file, x, 1);
5041 case 0: /* Don't do anything special */
5046 compute_zdepwi_operands (INTVAL (x), op);
5047 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5053 compute_zdepdi_operands (INTVAL (x), op);
5054 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5058 /* We can get here from a .vtable_inherit due to our
5059 CONSTANT_ADDRESS_P rejecting perfectly good constant
5065 if (GET_CODE (x) == REG)
5067 fputs (reg_names [REGNO (x)], file);
5068 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5074 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5075 && (REGNO (x) & 1) == 0)
5078 else if (GET_CODE (x) == MEM)
5080 int size = GET_MODE_SIZE (GET_MODE (x));
5081 rtx base = NULL_RTX;
5082 switch (GET_CODE (XEXP (x, 0)))
5086 base = XEXP (XEXP (x, 0), 0);
5087 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5091 base = XEXP (XEXP (x, 0), 0);
5092 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5095 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5096 fprintf (file, "%s(%s)",
5097 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5098 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5099 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5100 fprintf (file, "%s(%s)",
5101 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5102 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5103 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5104 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5106 /* Because the REG_POINTER flag can get lost during reload,
5107 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5108 index and base registers in the combined move patterns. */
5109 rtx base = XEXP (XEXP (x, 0), 1);
5110 rtx index = XEXP (XEXP (x, 0), 0);
5112 fprintf (file, "%s(%s)",
5113 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5116 output_address (XEXP (x, 0));
5119 output_address (XEXP (x, 0));
5124 output_addr_const (file, x);
5127 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5130 output_global_address (FILE *file, rtx x, int round_constant)
5133 /* Imagine (high (const (plus ...))). */
5134 if (GET_CODE (x) == HIGH)
5137 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5138 output_addr_const (file, x);
5139 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5141 output_addr_const (file, x);
5142 fputs ("-$global$", file);
5144 else if (GET_CODE (x) == CONST)
5146 const char *sep = "";
5147 int offset = 0; /* assembler wants -$global$ at end */
5148 rtx base = NULL_RTX;
5150 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5153 base = XEXP (XEXP (x, 0), 0);
5154 output_addr_const (file, base);
5157 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5163 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5166 base = XEXP (XEXP (x, 0), 1);
5167 output_addr_const (file, base);
5170 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5176 /* How bogus. The compiler is apparently responsible for
5177 rounding the constant if it uses an LR field selector.
5179 The linker and/or assembler seem a better place since
5180 they have to do this kind of thing already.
5182 If we fail to do this, HP's optimizing linker may eliminate
5183 an addil, but not update the ldw/stw/ldo instruction that
5184 uses the result of the addil. */
5186 offset = ((offset + 0x1000) & ~0x1fff);
5188 switch (GET_CODE (XEXP (x, 0)))
5201 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5209 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5210 fputs ("-$global$", file);
5212 fprintf (file, "%s%d", sep, offset);
5215 output_addr_const (file, x);
5218 /* Output boilerplate text to appear at the beginning of the file.
5219 There are several possible versions. */
5220 #define aputs(x) fputs(x, asm_out_file)
5222 pa_file_start_level (void)
5225 aputs ("\t.LEVEL 2.0w\n");
5226 else if (TARGET_PA_20)
5227 aputs ("\t.LEVEL 2.0\n");
5228 else if (TARGET_PA_11)
5229 aputs ("\t.LEVEL 1.1\n");
5231 aputs ("\t.LEVEL 1.0\n");
5235 pa_file_start_space (int sortspace)
5237 aputs ("\t.SPACE $PRIVATE$");
5240 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5241 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5242 "\n\t.SPACE $TEXT$");
5245 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5246 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5250 pa_file_start_file (int want_version)
5252 if (write_symbols != NO_DEBUG)
5254 output_file_directive (asm_out_file, main_input_filename);
5256 aputs ("\t.version\t\"01.01\"\n");
5261 pa_file_start_mcount (const char *aswhat)
5264 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5268 pa_elf_file_start (void)
5270 pa_file_start_level ();
5271 pa_file_start_mcount ("ENTRY");
5272 pa_file_start_file (0);
5276 pa_som_file_start (void)
5278 pa_file_start_level ();
5279 pa_file_start_space (0);
5280 aputs ("\t.IMPORT $global$,DATA\n"
5281 "\t.IMPORT $$dyncall,MILLICODE\n");
5282 pa_file_start_mcount ("CODE");
5283 pa_file_start_file (0);
5287 pa_linux_file_start (void)
5289 pa_file_start_file (1);
5290 pa_file_start_level ();
5291 pa_file_start_mcount ("CODE");
5295 pa_hpux64_gas_file_start (void)
5297 pa_file_start_level ();
5298 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5300 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5302 pa_file_start_file (1);
5306 pa_hpux64_hpas_file_start (void)
5308 pa_file_start_level ();
5309 pa_file_start_space (1);
5310 pa_file_start_mcount ("CODE");
5311 pa_file_start_file (0);
5315 /* Search the deferred plabel list for SYMBOL and return its internal
5316 label. If an entry for SYMBOL is not found, a new entry is created. */
5319 get_deferred_plabel (rtx symbol)
5321 const char *fname = XSTR (symbol, 0);
5324 /* See if we have already put this function on the list of deferred
5325 plabels. This list is generally small, so a liner search is not
5326 too ugly. If it proves too slow replace it with something faster. */
5327 for (i = 0; i < n_deferred_plabels; i++)
5328 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5331 /* If the deferred plabel list is empty, or this entry was not found
5332 on the list, create a new entry on the list. */
5333 if (deferred_plabels == NULL || i == n_deferred_plabels)
5337 if (deferred_plabels == 0)
5338 deferred_plabels = (struct deferred_plabel *)
5339 ggc_alloc (sizeof (struct deferred_plabel));
5341 deferred_plabels = (struct deferred_plabel *)
5342 ggc_realloc (deferred_plabels,
5343 ((n_deferred_plabels + 1)
5344 * sizeof (struct deferred_plabel)));
5346 i = n_deferred_plabels++;
5347 deferred_plabels[i].internal_label = gen_label_rtx ();
5348 deferred_plabels[i].symbol = symbol;
5350 /* Gross. We have just implicitly taken the address of this
5351 function. Mark it in the same manner as assemble_name. */
5352 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5354 mark_referenced (id);
5357 return deferred_plabels[i].internal_label;
5361 output_deferred_plabels (void)
5365 /* If we have some deferred plabels, then we need to switch into the
5366 data or readonly data section, and align it to a 4 byte boundary
5367 before outputting the deferred plabels. */
5368 if (n_deferred_plabels)
5370 switch_to_section (flag_pic ? data_section : readonly_data_section);
5371 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5374 /* Now output the deferred plabels. */
5375 for (i = 0; i < n_deferred_plabels; i++)
5377 targetm.asm_out.internal_label (asm_out_file, "L",
5378 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5379 assemble_integer (deferred_plabels[i].symbol,
5380 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5384 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5385 /* Initialize optabs to point to HPUX long double emulation routines. */
5387 pa_hpux_init_libfuncs (void)
5389 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5390 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5391 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5392 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5393 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5394 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5395 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5396 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5397 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5399 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5400 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5401 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5402 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5403 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5404 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5405 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5407 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5408 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5409 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5410 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5412 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5413 ? "__U_Qfcnvfxt_quad_to_sgl"
5414 : "_U_Qfcnvfxt_quad_to_sgl");
5415 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5416 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5417 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5419 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5420 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5421 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5422 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5426 /* HP's millicode routines mean something special to the assembler.
5427 Keep track of which ones we have used. */
5429 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5430 static void import_milli (enum millicodes);
5431 static char imported[(int) end1000];
5432 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5433 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5434 #define MILLI_START 10
5437 import_milli (enum millicodes code)
5439 char str[sizeof (import_string)];
5441 if (!imported[(int) code])
5443 imported[(int) code] = 1;
5444 strcpy (str, import_string);
5445 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5446 output_asm_insn (str, 0);
5450 /* The register constraints have put the operands and return value in
5451 the proper registers. */
5454 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5456 import_milli (mulI);
5457 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5460 /* Emit the rtl for doing a division by a constant. */
5462 /* Do magic division millicodes exist for this value? */
5463 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5465 /* We'll use an array to keep track of the magic millicodes and
5466 whether or not we've used them already. [n][0] is signed, [n][1] is
5469 static int div_milli[16][2];
5472 emit_hpdiv_const (rtx *operands, int unsignedp)
5474 if (GET_CODE (operands[2]) == CONST_INT
5475 && INTVAL (operands[2]) > 0
5476 && INTVAL (operands[2]) < 16
5477 && magic_milli[INTVAL (operands[2])])
5479 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5481 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5485 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5486 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5488 gen_rtx_REG (SImode, 26),
5490 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5491 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5492 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5493 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5494 gen_rtx_CLOBBER (VOIDmode, ret))));
5495 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5502 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5506 /* If the divisor is a constant, try to use one of the special
5508 if (GET_CODE (operands[0]) == CONST_INT)
5510 static char buf[100];
5511 divisor = INTVAL (operands[0]);
5512 if (!div_milli[divisor][unsignedp])
5514 div_milli[divisor][unsignedp] = 1;
5516 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5518 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5522 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5523 INTVAL (operands[0]));
5524 return output_millicode_call (insn,
5525 gen_rtx_SYMBOL_REF (SImode, buf));
5529 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5530 INTVAL (operands[0]));
5531 return output_millicode_call (insn,
5532 gen_rtx_SYMBOL_REF (SImode, buf));
5535 /* Divisor isn't a special constant. */
5540 import_milli (divU);
5541 return output_millicode_call (insn,
5542 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5546 import_milli (divI);
5547 return output_millicode_call (insn,
5548 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5553 /* Output a $$rem millicode to do mod. */
5556 output_mod_insn (int unsignedp, rtx insn)
5560 import_milli (remU);
5561 return output_millicode_call (insn,
5562 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5566 import_milli (remI);
5567 return output_millicode_call (insn,
5568 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5573 output_arg_descriptor (rtx call_insn)
5575 const char *arg_regs[4];
5576 enum machine_mode arg_mode;
5578 int i, output_flag = 0;
5581 /* We neither need nor want argument location descriptors for the
5582 64bit runtime environment or the ELF32 environment. */
5583 if (TARGET_64BIT || TARGET_ELF32)
5586 for (i = 0; i < 4; i++)
5589 /* Specify explicitly that no argument relocations should take place
5590 if using the portable runtime calling conventions. */
5591 if (TARGET_PORTABLE_RUNTIME)
5593 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5598 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5599 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5600 link; link = XEXP (link, 1))
5602 rtx use = XEXP (link, 0);
5604 if (! (GET_CODE (use) == USE
5605 && GET_CODE (XEXP (use, 0)) == REG
5606 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5609 arg_mode = GET_MODE (XEXP (use, 0));
5610 regno = REGNO (XEXP (use, 0));
5611 if (regno >= 23 && regno <= 26)
5613 arg_regs[26 - regno] = "GR";
5614 if (arg_mode == DImode)
5615 arg_regs[25 - regno] = "GR";
5617 else if (regno >= 32 && regno <= 39)
5619 if (arg_mode == SFmode)
5620 arg_regs[(regno - 32) / 2] = "FR";
5623 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5624 arg_regs[(regno - 34) / 2] = "FR";
5625 arg_regs[(regno - 34) / 2 + 1] = "FU";
5627 arg_regs[(regno - 34) / 2] = "FU";
5628 arg_regs[(regno - 34) / 2 + 1] = "FR";
5633 fputs ("\t.CALL ", asm_out_file);
5634 for (i = 0; i < 4; i++)
5639 fputc (',', asm_out_file);
5640 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5643 fputc ('\n', asm_out_file);
5646 static enum reg_class
5647 pa_secondary_reload (bool in_p, rtx x, enum reg_class class,
5648 enum machine_mode mode, secondary_reload_info *sri)
5650 int is_symbolic, regno;
5652 /* Handle the easy stuff first. */
5653 if (class == R1_REGS)
5659 if (class == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5665 /* If we have something like (mem (mem (...)), we can safely assume the
5666 inner MEM will end up in a general register after reloading, so there's
5667 no need for a secondary reload. */
5668 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5671 /* Trying to load a constant into a FP register during PIC code
5672 generation requires %r1 as a scratch register. */
5674 && (mode == SImode || mode == DImode)
5675 && FP_REG_CLASS_P (class)
5676 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5678 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5679 : CODE_FOR_reload_indi_r1);
5683 /* Profiling showed the PA port spends about 1.3% of its compilation
5684 time in true_regnum from calls inside pa_secondary_reload_class. */
5685 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5686 regno = true_regnum (x);
5688 /* Handle out of range displacement for integer mode loads/stores of
5690 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5691 && GET_MODE_CLASS (mode) == MODE_INT
5692 && FP_REG_CLASS_P (class))
5693 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5695 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5699 /* A SAR<->FP register copy requires a secondary register (GPR) as
5700 well as secondary memory. */
5701 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5702 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5703 || (class == SHIFT_REGS
5704 && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5706 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5710 /* Secondary reloads of symbolic operands require %r1 as a scratch
5711 register when we're generating PIC code and the operand isn't
5713 if (GET_CODE (x) == HIGH)
5716 /* Profiling has showed GCC spends about 2.6% of its compilation
5717 time in symbolic_operand from calls inside pa_secondary_reload_class.
5718 So, we use an inline copy to avoid useless work. */
5719 switch (GET_CODE (x))
5724 is_symbolic = !SYMBOL_REF_TLS_MODEL (x);
5731 is_symbolic = (((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
5732 && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
5733 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
5734 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5741 if (is_symbolic && (flag_pic || !read_only_operand (x, VOIDmode)))
5743 gcc_assert (mode == SImode || mode == DImode);
5744 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5745 : CODE_FOR_reload_indi_r1);
5751 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
5752 is only marked as live on entry by df-scan when it is a fixed
5753 register. It isn't a fixed register in the 64-bit runtime,
5754 so we need to mark it here. */
5757 pa_extra_live_on_entry (bitmap regs)
5760 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
5763 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
5764 to prevent it from being deleted. */
5767 pa_eh_return_handler_rtx (void)
5771 tmp = gen_rtx_PLUS (word_mode, frame_pointer_rtx,
5772 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
5773 tmp = gen_rtx_MEM (word_mode, tmp);
5778 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5779 by invisible reference. As a GCC extension, we also pass anything
5780 with a zero or variable size by reference.
5782 The 64-bit runtime does not describe passing any types by invisible
5783 reference. The internals of GCC can't currently handle passing
5784 empty structures, and zero or variable length arrays when they are
5785 not passed entirely on the stack or by reference. Thus, as a GCC
5786 extension, we pass these types by reference. The HP compiler doesn't
5787 support these types, so hopefully there shouldn't be any compatibility
5788 issues. This may have to be revisited when HP releases a C99 compiler
5789 or updates the ABI. */
5792 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5793 enum machine_mode mode, tree type,
5794 bool named ATTRIBUTE_UNUSED)
5799 size = int_size_in_bytes (type);
5801 size = GET_MODE_SIZE (mode);
5806 return size <= 0 || size > 8;
5810 function_arg_padding (enum machine_mode mode, tree type)
5813 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5815 /* Return none if justification is not required. */
5817 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5818 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5821 /* The directions set here are ignored when a BLKmode argument larger
5822 than a word is placed in a register. Different code is used for
5823 the stack and registers. This makes it difficult to have a
5824 consistent data representation for both the stack and registers.
5825 For both runtimes, the justification and padding for arguments on
5826 the stack and in registers should be identical. */
5828 /* The 64-bit runtime specifies left justification for aggregates. */
5831 /* The 32-bit runtime architecture specifies right justification.
5832 When the argument is passed on the stack, the argument is padded
5833 with garbage on the left. The HP compiler pads with zeros. */
5837 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5844 /* Do what is necessary for `va_start'. We look at the current function
5845 to determine if stdargs or varargs is used and fill in an initial
5846 va_list. A pointer to this constructor is returned. */
5849 hppa_builtin_saveregs (void)
5852 tree fntype = TREE_TYPE (current_function_decl);
5853 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5854 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5855 != void_type_node)))
5856 ? UNITS_PER_WORD : 0);
5859 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5861 offset = current_function_arg_offset_rtx;
5867 /* Adjust for varargs/stdarg differences. */
5869 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5871 offset = current_function_arg_offset_rtx;
5873 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5874 from the incoming arg pointer and growing to larger addresses. */
5875 for (i = 26, off = -64; i >= 19; i--, off += 8)
5876 emit_move_insn (gen_rtx_MEM (word_mode,
5877 plus_constant (arg_pointer_rtx, off)),
5878 gen_rtx_REG (word_mode, i));
5880 /* The incoming args pointer points just beyond the flushback area;
5881 normally this is not a serious concern. However, when we are doing
5882 varargs/stdargs we want to make the arg pointer point to the start
5883 of the incoming argument area. */
5884 emit_move_insn (virtual_incoming_args_rtx,
5885 plus_constant (arg_pointer_rtx, -64));
5887 /* Now return a pointer to the first anonymous argument. */
5888 return copy_to_reg (expand_binop (Pmode, add_optab,
5889 virtual_incoming_args_rtx,
5890 offset, 0, 0, OPTAB_LIB_WIDEN));
5893 /* Store general registers on the stack. */
5894 dest = gen_rtx_MEM (BLKmode,
5895 plus_constant (current_function_internal_arg_pointer,
5897 set_mem_alias_set (dest, get_varargs_alias_set ());
5898 set_mem_align (dest, BITS_PER_WORD);
5899 move_block_from_reg (23, dest, 4);
5901 /* move_block_from_reg will emit code to store the argument registers
5902 individually as scalar stores.
5904 However, other insns may later load from the same addresses for
5905 a structure load (passing a struct to a varargs routine).
5907 The alias code assumes that such aliasing can never happen, so we
5908 have to keep memory referencing insns from moving up beyond the
5909 last argument register store. So we emit a blockage insn here. */
5910 emit_insn (gen_blockage ());
5912 return copy_to_reg (expand_binop (Pmode, add_optab,
5913 current_function_internal_arg_pointer,
5914 offset, 0, 0, OPTAB_LIB_WIDEN));
5918 hppa_va_start (tree valist, rtx nextarg)
5920 nextarg = expand_builtin_saveregs ();
5921 std_expand_builtin_va_start (valist, nextarg);
5925 hppa_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p, tree *post_p)
5929 /* Args grow upward. We can use the generic routines. */
5930 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5932 else /* !TARGET_64BIT */
5934 tree ptr = build_pointer_type (type);
5937 unsigned int size, ofs;
5940 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
5944 ptr = build_pointer_type (type);
5946 size = int_size_in_bytes (type);
5947 valist_type = TREE_TYPE (valist);
5949 /* Args grow down. Not handled by generic routines. */
5951 u = fold_convert (sizetype, size_in_bytes (type));
5952 u = fold_build1 (NEGATE_EXPR, sizetype, u);
5953 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
5955 /* Copied from va-pa.h, but we probably don't need to align to
5956 word size, since we generate and preserve that invariant. */
5957 u = size_int (size > 4 ? -8 : -4);
5958 t = fold_convert (sizetype, t);
5959 t = build2 (BIT_AND_EXPR, sizetype, t, u);
5960 t = fold_convert (valist_type, t);
5962 t = build2 (MODIFY_EXPR, valist_type, valist, t);
5964 ofs = (8 - size) % 4;
5968 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
5971 t = fold_convert (ptr, t);
5972 t = build_va_arg_indirect_ref (t);
5975 t = build_va_arg_indirect_ref (t);
5981 /* True if MODE is valid for the target. By "valid", we mean able to
5982 be manipulated in non-trivial ways. In particular, this means all
5983 the arithmetic is supported.
5985 Currently, TImode is not valid as the HP 64-bit runtime documentation
5986 doesn't document the alignment and calling conventions for this type.
5987 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
5988 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
5991 pa_scalar_mode_supported_p (enum machine_mode mode)
5993 int precision = GET_MODE_PRECISION (mode);
5995 switch (GET_MODE_CLASS (mode))
5997 case MODE_PARTIAL_INT:
5999 if (precision == CHAR_TYPE_SIZE)
6001 if (precision == SHORT_TYPE_SIZE)
6003 if (precision == INT_TYPE_SIZE)
6005 if (precision == LONG_TYPE_SIZE)
6007 if (precision == LONG_LONG_TYPE_SIZE)
6012 if (precision == FLOAT_TYPE_SIZE)
6014 if (precision == DOUBLE_TYPE_SIZE)
6016 if (precision == LONG_DOUBLE_TYPE_SIZE)
6020 case MODE_DECIMAL_FLOAT:
6028 /* This routine handles all the normal conditional branch sequences we
6029 might need to generate. It handles compare immediate vs compare
6030 register, nullification of delay slots, varying length branches,
6031 negated branches, and all combinations of the above. It returns the
6032 output appropriate to emit the branch corresponding to all given
6036 output_cbranch (rtx *operands, int negated, rtx insn)
6038 static char buf[100];
6040 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6041 int length = get_attr_length (insn);
6044 /* A conditional branch to the following instruction (e.g. the delay slot)
6045 is asking for a disaster. This can happen when not optimizing and
6046 when jump optimization fails.
6048 While it is usually safe to emit nothing, this can fail if the
6049 preceding instruction is a nullified branch with an empty delay
6050 slot and the same branch target as this branch. We could check
6051 for this but jump optimization should eliminate nop jumps. It
6052 is always safe to emit a nop. */
6053 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6056 /* The doubleword form of the cmpib instruction doesn't have the LEU
6057 and GTU conditions while the cmpb instruction does. Since we accept
6058 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6059 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6060 operands[2] = gen_rtx_REG (DImode, 0);
6061 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6062 operands[1] = gen_rtx_REG (DImode, 0);
6064 /* If this is a long branch with its delay slot unfilled, set `nullify'
6065 as it can nullify the delay slot and save a nop. */
6066 if (length == 8 && dbr_sequence_length () == 0)
6069 /* If this is a short forward conditional branch which did not get
6070 its delay slot filled, the delay slot can still be nullified. */
6071 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6072 nullify = forward_branch_p (insn);
6074 /* A forward branch over a single nullified insn can be done with a
6075 comclr instruction. This avoids a single cycle penalty due to
6076 mis-predicted branch if we fall through (branch not taken). */
6078 && next_real_insn (insn) != 0
6079 && get_attr_length (next_real_insn (insn)) == 4
6080 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6086 /* All short conditional branches except backwards with an unfilled
6090 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6092 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6093 if (GET_MODE (operands[1]) == DImode)
6096 strcat (buf, "%B3");
6098 strcat (buf, "%S3");
6100 strcat (buf, " %2,%r1,%%r0");
6102 strcat (buf, ",n %2,%r1,%0");
6104 strcat (buf, " %2,%r1,%0");
6107 /* All long conditionals. Note a short backward branch with an
6108 unfilled delay slot is treated just like a long backward branch
6109 with an unfilled delay slot. */
6111 /* Handle weird backwards branch with a filled delay slot
6112 which is nullified. */
6113 if (dbr_sequence_length () != 0
6114 && ! forward_branch_p (insn)
6117 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6118 if (GET_MODE (operands[1]) == DImode)
6121 strcat (buf, "%S3");
6123 strcat (buf, "%B3");
6124 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6126 /* Handle short backwards branch with an unfilled delay slot.
6127 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6128 taken and untaken branches. */
6129 else if (dbr_sequence_length () == 0
6130 && ! forward_branch_p (insn)
6131 && INSN_ADDRESSES_SET_P ()
6132 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6133 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6135 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6136 if (GET_MODE (operands[1]) == DImode)
6139 strcat (buf, "%B3 %2,%r1,%0%#");
6141 strcat (buf, "%S3 %2,%r1,%0%#");
6145 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6146 if (GET_MODE (operands[1]) == DImode)
6149 strcat (buf, "%S3");
6151 strcat (buf, "%B3");
6153 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6155 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6160 /* The reversed conditional branch must branch over one additional
6161 instruction if the delay slot is filled and needs to be extracted
6162 by output_lbranch. If the delay slot is empty or this is a
6163 nullified forward branch, the instruction after the reversed
6164 condition branch must be nullified. */
6165 if (dbr_sequence_length () == 0
6166 || (nullify && forward_branch_p (insn)))
6170 operands[4] = GEN_INT (length);
6175 operands[4] = GEN_INT (length + 4);
6178 /* Create a reversed conditional branch which branches around
6179 the following insns. */
6180 if (GET_MODE (operands[1]) != DImode)
6186 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6189 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6195 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6198 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6207 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6210 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6216 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6219 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6223 output_asm_insn (buf, operands);
6224 return output_lbranch (operands[0], insn, xdelay);
6229 /* This routine handles output of long unconditional branches that
6230 exceed the maximum range of a simple branch instruction. Since
6231 we don't have a register available for the branch, we save register
6232 %r1 in the frame marker, load the branch destination DEST into %r1,
6233 execute the branch, and restore %r1 in the delay slot of the branch.
6235 Since long branches may have an insn in the delay slot and the
6236 delay slot is used to restore %r1, we in general need to extract
6237 this insn and execute it before the branch. However, to facilitate
6238 use of this function by conditional branches, we also provide an
6239 option to not extract the delay insn so that it will be emitted
6240 after the long branch. So, if there is an insn in the delay slot,
6241 it is extracted if XDELAY is nonzero.
6243 The lengths of the various long-branch sequences are 20, 16 and 24
6244 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6247 output_lbranch (rtx dest, rtx insn, int xdelay)
6251 xoperands[0] = dest;
6253 /* First, free up the delay slot. */
6254 if (xdelay && dbr_sequence_length () != 0)
6256 /* We can't handle a jump in the delay slot. */
6257 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6259 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6262 /* Now delete the delay insn. */
6263 SET_INSN_DELETED (NEXT_INSN (insn));
6266 /* Output an insn to save %r1. The runtime documentation doesn't
6267 specify whether the "Clean Up" slot in the callers frame can
6268 be clobbered by the callee. It isn't copied by HP's builtin
6269 alloca, so this suggests that it can be clobbered if necessary.
6270 The "Static Link" location is copied by HP builtin alloca, so
6271 we avoid using it. Using the cleanup slot might be a problem
6272 if we have to interoperate with languages that pass cleanup
6273 information. However, it should be possible to handle these
6274 situations with GCC's asm feature.
6276 The "Current RP" slot is reserved for the called procedure, so
6277 we try to use it when we don't have a frame of our own. It's
6278 rather unlikely that we won't have a frame when we need to emit
6281 Really the way to go long term is a register scavenger; goto
6282 the target of the jump and find a register which we can use
6283 as a scratch to hold the value in %r1. Then, we wouldn't have
6284 to free up the delay slot or clobber a slot that may be needed
6285 for other purposes. */
6288 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6289 /* Use the return pointer slot in the frame marker. */
6290 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6292 /* Use the slot at -40 in the frame marker since HP builtin
6293 alloca doesn't copy it. */
6294 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6298 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6299 /* Use the return pointer slot in the frame marker. */
6300 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6302 /* Use the "Clean Up" slot in the frame marker. In GCC,
6303 the only other use of this location is for copying a
6304 floating point double argument from a floating-point
6305 register to two general registers. The copy is done
6306 as an "atomic" operation when outputting a call, so it
6307 won't interfere with our using the location here. */
6308 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6311 if (TARGET_PORTABLE_RUNTIME)
6313 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6314 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6315 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6319 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6320 if (TARGET_SOM || !TARGET_GAS)
6322 xoperands[1] = gen_label_rtx ();
6323 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6324 targetm.asm_out.internal_label (asm_out_file, "L",
6325 CODE_LABEL_NUMBER (xoperands[1]));
6326 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6330 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6331 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6333 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6336 /* Now output a very long branch to the original target. */
6337 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6339 /* Now restore the value of %r1 in the delay slot. */
6342 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6343 return "ldd -16(%%r30),%%r1";
6345 return "ldd -40(%%r30),%%r1";
6349 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6350 return "ldw -20(%%r30),%%r1";
6352 return "ldw -12(%%r30),%%r1";
6356 /* This routine handles all the branch-on-bit conditional branch sequences we
6357 might need to generate. It handles nullification of delay slots,
6358 varying length branches, negated branches and all combinations of the
6359 above. it returns the appropriate output template to emit the branch. */
6362 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6364 static char buf[100];
6366 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6367 int length = get_attr_length (insn);
6370 /* A conditional branch to the following instruction (e.g. the delay slot) is
6371 asking for a disaster. I do not think this can happen as this pattern
6372 is only used when optimizing; jump optimization should eliminate the
6373 jump. But be prepared just in case. */
6375 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6378 /* If this is a long branch with its delay slot unfilled, set `nullify'
6379 as it can nullify the delay slot and save a nop. */
6380 if (length == 8 && dbr_sequence_length () == 0)
6383 /* If this is a short forward conditional branch which did not get
6384 its delay slot filled, the delay slot can still be nullified. */
6385 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6386 nullify = forward_branch_p (insn);
6388 /* A forward branch over a single nullified insn can be done with a
6389 extrs instruction. This avoids a single cycle penalty due to
6390 mis-predicted branch if we fall through (branch not taken). */
6393 && next_real_insn (insn) != 0
6394 && get_attr_length (next_real_insn (insn)) == 4
6395 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6402 /* All short conditional branches except backwards with an unfilled
6406 strcpy (buf, "{extrs,|extrw,s,}");
6408 strcpy (buf, "bb,");
6409 if (useskip && GET_MODE (operands[0]) == DImode)
6410 strcpy (buf, "extrd,s,*");
6411 else if (GET_MODE (operands[0]) == DImode)
6412 strcpy (buf, "bb,*");
6413 if ((which == 0 && negated)
6414 || (which == 1 && ! negated))
6419 strcat (buf, " %0,%1,1,%%r0");
6420 else if (nullify && negated)
6421 strcat (buf, ",n %0,%1,%3");
6422 else if (nullify && ! negated)
6423 strcat (buf, ",n %0,%1,%2");
6424 else if (! nullify && negated)
6425 strcat (buf, "%0,%1,%3");
6426 else if (! nullify && ! negated)
6427 strcat (buf, " %0,%1,%2");
6430 /* All long conditionals. Note a short backward branch with an
6431 unfilled delay slot is treated just like a long backward branch
6432 with an unfilled delay slot. */
6434 /* Handle weird backwards branch with a filled delay slot
6435 which is nullified. */
6436 if (dbr_sequence_length () != 0
6437 && ! forward_branch_p (insn)
6440 strcpy (buf, "bb,");
6441 if (GET_MODE (operands[0]) == DImode)
6443 if ((which == 0 && negated)
6444 || (which == 1 && ! negated))
6449 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6451 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6453 /* Handle short backwards branch with an unfilled delay slot.
6454 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6455 taken and untaken branches. */
6456 else if (dbr_sequence_length () == 0
6457 && ! forward_branch_p (insn)
6458 && INSN_ADDRESSES_SET_P ()
6459 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6460 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6462 strcpy (buf, "bb,");
6463 if (GET_MODE (operands[0]) == DImode)
6465 if ((which == 0 && negated)
6466 || (which == 1 && ! negated))
6471 strcat (buf, " %0,%1,%3%#");
6473 strcat (buf, " %0,%1,%2%#");
6477 if (GET_MODE (operands[0]) == DImode)
6478 strcpy (buf, "extrd,s,*");
6480 strcpy (buf, "{extrs,|extrw,s,}");
6481 if ((which == 0 && negated)
6482 || (which == 1 && ! negated))
6486 if (nullify && negated)
6487 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6488 else if (nullify && ! negated)
6489 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6491 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6493 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6498 /* The reversed conditional branch must branch over one additional
6499 instruction if the delay slot is filled and needs to be extracted
6500 by output_lbranch. If the delay slot is empty or this is a
6501 nullified forward branch, the instruction after the reversed
6502 condition branch must be nullified. */
6503 if (dbr_sequence_length () == 0
6504 || (nullify && forward_branch_p (insn)))
6508 operands[4] = GEN_INT (length);
6513 operands[4] = GEN_INT (length + 4);
6516 if (GET_MODE (operands[0]) == DImode)
6517 strcpy (buf, "bb,*");
6519 strcpy (buf, "bb,");
6520 if ((which == 0 && negated)
6521 || (which == 1 && !negated))
6526 strcat (buf, ",n %0,%1,.+%4");
6528 strcat (buf, " %0,%1,.+%4");
6529 output_asm_insn (buf, operands);
6530 return output_lbranch (negated ? operands[3] : operands[2],
6536 /* This routine handles all the branch-on-variable-bit conditional branch
6537 sequences we might need to generate. It handles nullification of delay
6538 slots, varying length branches, negated branches and all combinations
6539 of the above. it returns the appropriate output template to emit the
6543 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6545 static char buf[100];
6547 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6548 int length = get_attr_length (insn);
6551 /* A conditional branch to the following instruction (e.g. the delay slot) is
6552 asking for a disaster. I do not think this can happen as this pattern
6553 is only used when optimizing; jump optimization should eliminate the
6554 jump. But be prepared just in case. */
6556 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6559 /* If this is a long branch with its delay slot unfilled, set `nullify'
6560 as it can nullify the delay slot and save a nop. */
6561 if (length == 8 && dbr_sequence_length () == 0)
6564 /* If this is a short forward conditional branch which did not get
6565 its delay slot filled, the delay slot can still be nullified. */
6566 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6567 nullify = forward_branch_p (insn);
6569 /* A forward branch over a single nullified insn can be done with a
6570 extrs instruction. This avoids a single cycle penalty due to
6571 mis-predicted branch if we fall through (branch not taken). */
6574 && next_real_insn (insn) != 0
6575 && get_attr_length (next_real_insn (insn)) == 4
6576 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6583 /* All short conditional branches except backwards with an unfilled
6587 strcpy (buf, "{vextrs,|extrw,s,}");
6589 strcpy (buf, "{bvb,|bb,}");
6590 if (useskip && GET_MODE (operands[0]) == DImode)
6591 strcpy (buf, "extrd,s,*");
6592 else if (GET_MODE (operands[0]) == DImode)
6593 strcpy (buf, "bb,*");
6594 if ((which == 0 && negated)
6595 || (which == 1 && ! negated))
6600 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6601 else if (nullify && negated)
6602 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6603 else if (nullify && ! negated)
6604 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6605 else if (! nullify && negated)
6606 strcat (buf, "{%0,%3|%0,%%sar,%3}");
6607 else if (! nullify && ! negated)
6608 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6611 /* All long conditionals. Note a short backward branch with an
6612 unfilled delay slot is treated just like a long backward branch
6613 with an unfilled delay slot. */
6615 /* Handle weird backwards branch with a filled delay slot
6616 which is nullified. */
6617 if (dbr_sequence_length () != 0
6618 && ! forward_branch_p (insn)
6621 strcpy (buf, "{bvb,|bb,}");
6622 if (GET_MODE (operands[0]) == DImode)
6624 if ((which == 0 && negated)
6625 || (which == 1 && ! negated))
6630 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6632 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6634 /* Handle short backwards branch with an unfilled delay slot.
6635 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6636 taken and untaken branches. */
6637 else if (dbr_sequence_length () == 0
6638 && ! forward_branch_p (insn)
6639 && INSN_ADDRESSES_SET_P ()
6640 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6641 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6643 strcpy (buf, "{bvb,|bb,}");
6644 if (GET_MODE (operands[0]) == DImode)
6646 if ((which == 0 && negated)
6647 || (which == 1 && ! negated))
6652 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6654 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6658 strcpy (buf, "{vextrs,|extrw,s,}");
6659 if (GET_MODE (operands[0]) == DImode)
6660 strcpy (buf, "extrd,s,*");
6661 if ((which == 0 && negated)
6662 || (which == 1 && ! negated))
6666 if (nullify && negated)
6667 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6668 else if (nullify && ! negated)
6669 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6671 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6673 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6678 /* The reversed conditional branch must branch over one additional
6679 instruction if the delay slot is filled and needs to be extracted
6680 by output_lbranch. If the delay slot is empty or this is a
6681 nullified forward branch, the instruction after the reversed
6682 condition branch must be nullified. */
6683 if (dbr_sequence_length () == 0
6684 || (nullify && forward_branch_p (insn)))
6688 operands[4] = GEN_INT (length);
6693 operands[4] = GEN_INT (length + 4);
6696 if (GET_MODE (operands[0]) == DImode)
6697 strcpy (buf, "bb,*");
6699 strcpy (buf, "{bvb,|bb,}");
6700 if ((which == 0 && negated)
6701 || (which == 1 && !negated))
6706 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6708 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6709 output_asm_insn (buf, operands);
6710 return output_lbranch (negated ? operands[3] : operands[2],
6716 /* Return the output template for emitting a dbra type insn.
6718 Note it may perform some output operations on its own before
6719 returning the final output string. */
6721 output_dbra (rtx *operands, rtx insn, int which_alternative)
6723 int length = get_attr_length (insn);
6725 /* A conditional branch to the following instruction (e.g. the delay slot) is
6726 asking for a disaster. Be prepared! */
6728 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6730 if (which_alternative == 0)
6731 return "ldo %1(%0),%0";
6732 else if (which_alternative == 1)
6734 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6735 output_asm_insn ("ldw -16(%%r30),%4", operands);
6736 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6737 return "{fldws|fldw} -16(%%r30),%0";
6741 output_asm_insn ("ldw %0,%4", operands);
6742 return "ldo %1(%4),%4\n\tstw %4,%0";
6746 if (which_alternative == 0)
6748 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6751 /* If this is a long branch with its delay slot unfilled, set `nullify'
6752 as it can nullify the delay slot and save a nop. */
6753 if (length == 8 && dbr_sequence_length () == 0)
6756 /* If this is a short forward conditional branch which did not get
6757 its delay slot filled, the delay slot can still be nullified. */
6758 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6759 nullify = forward_branch_p (insn);
6765 return "addib,%C2,n %1,%0,%3";
6767 return "addib,%C2 %1,%0,%3";
6770 /* Handle weird backwards branch with a fulled delay slot
6771 which is nullified. */
6772 if (dbr_sequence_length () != 0
6773 && ! forward_branch_p (insn)
6775 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6776 /* Handle short backwards branch with an unfilled delay slot.
6777 Using a addb;nop rather than addi;bl saves 1 cycle for both
6778 taken and untaken branches. */
6779 else if (dbr_sequence_length () == 0
6780 && ! forward_branch_p (insn)
6781 && INSN_ADDRESSES_SET_P ()
6782 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6783 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6784 return "addib,%C2 %1,%0,%3%#";
6786 /* Handle normal cases. */
6788 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6790 return "addi,%N2 %1,%0,%0\n\tb %3";
6793 /* The reversed conditional branch must branch over one additional
6794 instruction if the delay slot is filled and needs to be extracted
6795 by output_lbranch. If the delay slot is empty or this is a
6796 nullified forward branch, the instruction after the reversed
6797 condition branch must be nullified. */
6798 if (dbr_sequence_length () == 0
6799 || (nullify && forward_branch_p (insn)))
6803 operands[4] = GEN_INT (length);
6808 operands[4] = GEN_INT (length + 4);
6812 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
6814 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
6816 return output_lbranch (operands[3], insn, xdelay);
6820 /* Deal with gross reload from FP register case. */
6821 else if (which_alternative == 1)
6823 /* Move loop counter from FP register to MEM then into a GR,
6824 increment the GR, store the GR into MEM, and finally reload
6825 the FP register from MEM from within the branch's delay slot. */
6826 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6828 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6830 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6831 else if (length == 28)
6832 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6835 operands[5] = GEN_INT (length - 16);
6836 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
6837 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6838 return output_lbranch (operands[3], insn, 0);
6841 /* Deal with gross reload from memory case. */
6844 /* Reload loop counter from memory, the store back to memory
6845 happens in the branch's delay slot. */
6846 output_asm_insn ("ldw %0,%4", operands);
6848 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6849 else if (length == 16)
6850 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6853 operands[5] = GEN_INT (length - 4);
6854 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
6855 return output_lbranch (operands[3], insn, 0);
6860 /* Return the output template for emitting a movb type insn.
6862 Note it may perform some output operations on its own before
6863 returning the final output string. */
6865 output_movb (rtx *operands, rtx insn, int which_alternative,
6866 int reverse_comparison)
6868 int length = get_attr_length (insn);
6870 /* A conditional branch to the following instruction (e.g. the delay slot) is
6871 asking for a disaster. Be prepared! */
6873 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6875 if (which_alternative == 0)
6876 return "copy %1,%0";
6877 else if (which_alternative == 1)
6879 output_asm_insn ("stw %1,-16(%%r30)", operands);
6880 return "{fldws|fldw} -16(%%r30),%0";
6882 else if (which_alternative == 2)
6888 /* Support the second variant. */
6889 if (reverse_comparison)
6890 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6892 if (which_alternative == 0)
6894 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6897 /* If this is a long branch with its delay slot unfilled, set `nullify'
6898 as it can nullify the delay slot and save a nop. */
6899 if (length == 8 && dbr_sequence_length () == 0)
6902 /* If this is a short forward conditional branch which did not get
6903 its delay slot filled, the delay slot can still be nullified. */
6904 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6905 nullify = forward_branch_p (insn);
6911 return "movb,%C2,n %1,%0,%3";
6913 return "movb,%C2 %1,%0,%3";
6916 /* Handle weird backwards branch with a filled delay slot
6917 which is nullified. */
6918 if (dbr_sequence_length () != 0
6919 && ! forward_branch_p (insn)
6921 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6923 /* Handle short backwards branch with an unfilled delay slot.
6924 Using a movb;nop rather than or;bl saves 1 cycle for both
6925 taken and untaken branches. */
6926 else if (dbr_sequence_length () == 0
6927 && ! forward_branch_p (insn)
6928 && INSN_ADDRESSES_SET_P ()
6929 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6930 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6931 return "movb,%C2 %1,%0,%3%#";
6932 /* Handle normal cases. */
6934 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6936 return "or,%N2 %1,%%r0,%0\n\tb %3";
6939 /* The reversed conditional branch must branch over one additional
6940 instruction if the delay slot is filled and needs to be extracted
6941 by output_lbranch. If the delay slot is empty or this is a
6942 nullified forward branch, the instruction after the reversed
6943 condition branch must be nullified. */
6944 if (dbr_sequence_length () == 0
6945 || (nullify && forward_branch_p (insn)))
6949 operands[4] = GEN_INT (length);
6954 operands[4] = GEN_INT (length + 4);
6958 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
6960 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
6962 return output_lbranch (operands[3], insn, xdelay);
6965 /* Deal with gross reload for FP destination register case. */
6966 else if (which_alternative == 1)
6968 /* Move source register to MEM, perform the branch test, then
6969 finally load the FP register from MEM from within the branch's
6971 output_asm_insn ("stw %1,-16(%%r30)", operands);
6973 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6974 else if (length == 16)
6975 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6978 operands[4] = GEN_INT (length - 4);
6979 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
6980 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6981 return output_lbranch (operands[3], insn, 0);
6984 /* Deal with gross reload from memory case. */
6985 else if (which_alternative == 2)
6987 /* Reload loop counter from memory, the store back to memory
6988 happens in the branch's delay slot. */
6990 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6991 else if (length == 12)
6992 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6995 operands[4] = GEN_INT (length);
6996 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
6998 return output_lbranch (operands[3], insn, 0);
7001 /* Handle SAR as a destination. */
7005 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7006 else if (length == 12)
7007 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7010 operands[4] = GEN_INT (length);
7011 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7013 return output_lbranch (operands[3], insn, 0);
7018 /* Copy any FP arguments in INSN into integer registers. */
7020 copy_fp_args (rtx insn)
7025 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7027 int arg_mode, regno;
7028 rtx use = XEXP (link, 0);
7030 if (! (GET_CODE (use) == USE
7031 && GET_CODE (XEXP (use, 0)) == REG
7032 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7035 arg_mode = GET_MODE (XEXP (use, 0));
7036 regno = REGNO (XEXP (use, 0));
7038 /* Is it a floating point register? */
7039 if (regno >= 32 && regno <= 39)
7041 /* Copy the FP register into an integer register via memory. */
7042 if (arg_mode == SFmode)
7044 xoperands[0] = XEXP (use, 0);
7045 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7046 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7047 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7051 xoperands[0] = XEXP (use, 0);
7052 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7053 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7054 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7055 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7061 /* Compute length of the FP argument copy sequence for INSN. */
7063 length_fp_args (rtx insn)
7068 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7070 int arg_mode, regno;
7071 rtx use = XEXP (link, 0);
7073 if (! (GET_CODE (use) == USE
7074 && GET_CODE (XEXP (use, 0)) == REG
7075 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7078 arg_mode = GET_MODE (XEXP (use, 0));
7079 regno = REGNO (XEXP (use, 0));
7081 /* Is it a floating point register? */
7082 if (regno >= 32 && regno <= 39)
7084 if (arg_mode == SFmode)
7094 /* Return the attribute length for the millicode call instruction INSN.
7095 The length must match the code generated by output_millicode_call.
7096 We include the delay slot in the returned length as it is better to
7097 over estimate the length than to under estimate it. */
7100 attr_length_millicode_call (rtx insn)
7102 unsigned long distance = -1;
7103 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7105 if (INSN_ADDRESSES_SET_P ())
7107 distance = (total + insn_current_reference_address (insn));
7108 if (distance < total)
7114 if (!TARGET_LONG_CALLS && distance < 7600000)
7119 else if (TARGET_PORTABLE_RUNTIME)
7123 if (!TARGET_LONG_CALLS && distance < 240000)
7126 if (TARGET_LONG_ABS_CALL && !flag_pic)
7133 /* INSN is a function call. It may have an unconditional jump
7136 CALL_DEST is the routine we are calling. */
7139 output_millicode_call (rtx insn, rtx call_dest)
7141 int attr_length = get_attr_length (insn);
7142 int seq_length = dbr_sequence_length ();
7147 xoperands[0] = call_dest;
7148 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7150 /* Handle the common case where we are sure that the branch will
7151 reach the beginning of the $CODE$ subspace. The within reach
7152 form of the $$sh_func_adrs call has a length of 28. Because
7153 it has an attribute type of multi, it never has a nonzero
7154 sequence length. The length of the $$sh_func_adrs is the same
7155 as certain out of reach PIC calls to other routines. */
7156 if (!TARGET_LONG_CALLS
7157 && ((seq_length == 0
7158 && (attr_length == 12
7159 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7160 || (seq_length != 0 && attr_length == 8)))
7162 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7168 /* It might seem that one insn could be saved by accessing
7169 the millicode function using the linkage table. However,
7170 this doesn't work in shared libraries and other dynamically
7171 loaded objects. Using a pc-relative sequence also avoids
7172 problems related to the implicit use of the gp register. */
7173 output_asm_insn ("b,l .+8,%%r1", xoperands);
7177 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7178 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7182 xoperands[1] = gen_label_rtx ();
7183 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7184 targetm.asm_out.internal_label (asm_out_file, "L",
7185 CODE_LABEL_NUMBER (xoperands[1]));
7186 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7189 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7191 else if (TARGET_PORTABLE_RUNTIME)
7193 /* Pure portable runtime doesn't allow be/ble; we also don't
7194 have PIC support in the assembler/linker, so this sequence
7197 /* Get the address of our target into %r1. */
7198 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7199 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7201 /* Get our return address into %r31. */
7202 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7203 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7205 /* Jump to our target address in %r1. */
7206 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7210 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7212 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7214 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7218 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7219 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7221 if (TARGET_SOM || !TARGET_GAS)
7223 /* The HP assembler can generate relocations for the
7224 difference of two symbols. GAS can do this for a
7225 millicode symbol but not an arbitrary external
7226 symbol when generating SOM output. */
7227 xoperands[1] = gen_label_rtx ();
7228 targetm.asm_out.internal_label (asm_out_file, "L",
7229 CODE_LABEL_NUMBER (xoperands[1]));
7230 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7231 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7235 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7236 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7240 /* Jump to our target address in %r1. */
7241 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7245 if (seq_length == 0)
7246 output_asm_insn ("nop", xoperands);
7248 /* We are done if there isn't a jump in the delay slot. */
7249 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7252 /* This call has an unconditional jump in its delay slot. */
7253 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7255 /* See if the return address can be adjusted. Use the containing
7256 sequence insn's address. */
7257 if (INSN_ADDRESSES_SET_P ())
7259 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7260 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7261 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7263 if (VAL_14_BITS_P (distance))
7265 xoperands[1] = gen_label_rtx ();
7266 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7267 targetm.asm_out.internal_label (asm_out_file, "L",
7268 CODE_LABEL_NUMBER (xoperands[1]));
7271 /* ??? This branch may not reach its target. */
7272 output_asm_insn ("nop\n\tb,n %0", xoperands);
7275 /* ??? This branch may not reach its target. */
7276 output_asm_insn ("nop\n\tb,n %0", xoperands);
7278 /* Delete the jump. */
7279 SET_INSN_DELETED (NEXT_INSN (insn));
7284 /* Return the attribute length of the call instruction INSN. The SIBCALL
7285 flag indicates whether INSN is a regular call or a sibling call. The
7286 length returned must be longer than the code actually generated by
7287 output_call. Since branch shortening is done before delay branch
7288 sequencing, there is no way to determine whether or not the delay
7289 slot will be filled during branch shortening. Even when the delay
7290 slot is filled, we may have to add a nop if the delay slot contains
7291 a branch that can't reach its target. Thus, we always have to include
7292 the delay slot in the length estimate. This used to be done in
7293 pa_adjust_insn_length but we do it here now as some sequences always
7294 fill the delay slot and we can save four bytes in the estimate for
7298 attr_length_call (rtx insn, int sibcall)
7304 rtx pat = PATTERN (insn);
7305 unsigned long distance = -1;
7307 if (INSN_ADDRESSES_SET_P ())
7309 unsigned long total;
7311 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7312 distance = (total + insn_current_reference_address (insn));
7313 if (distance < total)
7317 /* Determine if this is a local call. */
7318 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL)
7319 call_dest = XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0);
7321 call_dest = XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0);
7323 call_decl = SYMBOL_REF_DECL (call_dest);
7324 local_call = call_decl && targetm.binds_local_p (call_decl);
7326 /* pc-relative branch. */
7327 if (!TARGET_LONG_CALLS
7328 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7329 || distance < 240000))
7332 /* 64-bit plabel sequence. */
7333 else if (TARGET_64BIT && !local_call)
7334 length += sibcall ? 28 : 24;
7336 /* non-pic long absolute branch sequence. */
7337 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7340 /* long pc-relative branch sequence. */
7341 else if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7342 || (TARGET_64BIT && !TARGET_GAS)
7343 || (TARGET_GAS && !TARGET_SOM
7344 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7348 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS)
7352 /* 32-bit plabel sequence. */
7358 length += length_fp_args (insn);
7368 if (!TARGET_NO_SPACE_REGS)
7376 /* INSN is a function call. It may have an unconditional jump
7379 CALL_DEST is the routine we are calling. */
7382 output_call (rtx insn, rtx call_dest, int sibcall)
7384 int delay_insn_deleted = 0;
7385 int delay_slot_filled = 0;
7386 int seq_length = dbr_sequence_length ();
7387 tree call_decl = SYMBOL_REF_DECL (call_dest);
7388 int local_call = call_decl && targetm.binds_local_p (call_decl);
7391 xoperands[0] = call_dest;
7393 /* Handle the common case where we're sure that the branch will reach
7394 the beginning of the "$CODE$" subspace. This is the beginning of
7395 the current function if we are in a named section. */
7396 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7398 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7399 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7403 if (TARGET_64BIT && !local_call)
7405 /* ??? As far as I can tell, the HP linker doesn't support the
7406 long pc-relative sequence described in the 64-bit runtime
7407 architecture. So, we use a slightly longer indirect call. */
7408 xoperands[0] = get_deferred_plabel (call_dest);
7409 xoperands[1] = gen_label_rtx ();
7411 /* If this isn't a sibcall, we put the load of %r27 into the
7412 delay slot. We can't do this in a sibcall as we don't
7413 have a second call-clobbered scratch register available. */
7415 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7418 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7421 /* Now delete the delay insn. */
7422 SET_INSN_DELETED (NEXT_INSN (insn));
7423 delay_insn_deleted = 1;
7426 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7427 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7428 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7432 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7433 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7434 output_asm_insn ("bve (%%r1)", xoperands);
7438 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7439 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7440 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7441 delay_slot_filled = 1;
7446 int indirect_call = 0;
7448 /* Emit a long call. There are several different sequences
7449 of increasing length and complexity. In most cases,
7450 they don't allow an instruction in the delay slot. */
7451 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7452 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7453 && !(TARGET_GAS && !TARGET_SOM
7454 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7459 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7461 && (!TARGET_PA_20 || indirect_call))
7463 /* A non-jump insn in the delay slot. By definition we can
7464 emit this insn before the call (and in fact before argument
7466 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7469 /* Now delete the delay insn. */
7470 SET_INSN_DELETED (NEXT_INSN (insn));
7471 delay_insn_deleted = 1;
7474 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7476 /* This is the best sequence for making long calls in
7477 non-pic code. Unfortunately, GNU ld doesn't provide
7478 the stub needed for external calls, and GAS's support
7479 for this with the SOM linker is buggy. It is safe
7480 to use this for local calls. */
7481 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7483 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7487 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7490 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7492 output_asm_insn ("copy %%r31,%%r2", xoperands);
7493 delay_slot_filled = 1;
7498 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7499 || (TARGET_64BIT && !TARGET_GAS))
7501 /* The HP assembler and linker can handle relocations
7502 for the difference of two symbols. GAS and the HP
7503 linker can't do this when one of the symbols is
7505 xoperands[1] = gen_label_rtx ();
7506 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7507 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7508 targetm.asm_out.internal_label (asm_out_file, "L",
7509 CODE_LABEL_NUMBER (xoperands[1]));
7510 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7512 else if (TARGET_GAS && !TARGET_SOM
7513 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7515 /* GAS currently can't generate the relocations that
7516 are needed for the SOM linker under HP-UX using this
7517 sequence. The GNU linker doesn't generate the stubs
7518 that are needed for external calls on TARGET_ELF32
7519 with this sequence. For now, we have to use a
7520 longer plabel sequence when using GAS. */
7521 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7522 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7524 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7529 /* Emit a long plabel-based call sequence. This is
7530 essentially an inline implementation of $$dyncall.
7531 We don't actually try to call $$dyncall as this is
7532 as difficult as calling the function itself. */
7533 xoperands[0] = get_deferred_plabel (call_dest);
7534 xoperands[1] = gen_label_rtx ();
7536 /* Since the call is indirect, FP arguments in registers
7537 need to be copied to the general registers. Then, the
7538 argument relocation stub will copy them back. */
7540 copy_fp_args (insn);
7544 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7545 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7546 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7550 output_asm_insn ("addil LR'%0-$global$,%%r27",
7552 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7556 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7557 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7558 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7559 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7561 if (!sibcall && !TARGET_PA_20)
7563 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7564 if (TARGET_NO_SPACE_REGS)
7565 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7567 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7574 output_asm_insn ("bve (%%r1)", xoperands);
7579 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7580 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7581 delay_slot_filled = 1;
7584 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7589 if (!TARGET_NO_SPACE_REGS)
7590 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7595 if (TARGET_NO_SPACE_REGS)
7596 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7598 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7602 if (TARGET_NO_SPACE_REGS)
7603 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7605 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7608 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7610 output_asm_insn ("copy %%r31,%%r2", xoperands);
7611 delay_slot_filled = 1;
7618 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7619 output_asm_insn ("nop", xoperands);
7621 /* We are done if there isn't a jump in the delay slot. */
7623 || delay_insn_deleted
7624 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7627 /* A sibcall should never have a branch in the delay slot. */
7628 gcc_assert (!sibcall);
7630 /* This call has an unconditional jump in its delay slot. */
7631 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7633 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7635 /* See if the return address can be adjusted. Use the containing
7636 sequence insn's address. */
7637 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7638 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7639 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7641 if (VAL_14_BITS_P (distance))
7643 xoperands[1] = gen_label_rtx ();
7644 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7645 targetm.asm_out.internal_label (asm_out_file, "L",
7646 CODE_LABEL_NUMBER (xoperands[1]));
7649 output_asm_insn ("nop\n\tb,n %0", xoperands);
7652 output_asm_insn ("b,n %0", xoperands);
7654 /* Delete the jump. */
7655 SET_INSN_DELETED (NEXT_INSN (insn));
7660 /* Return the attribute length of the indirect call instruction INSN.
7661 The length must match the code generated by output_indirect call.
7662 The returned length includes the delay slot. Currently, the delay
7663 slot of an indirect call sequence is not exposed and it is used by
7664 the sequence itself. */
7667 attr_length_indirect_call (rtx insn)
7669 unsigned long distance = -1;
7670 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7672 if (INSN_ADDRESSES_SET_P ())
7674 distance = (total + insn_current_reference_address (insn));
7675 if (distance < total)
7682 if (TARGET_FAST_INDIRECT_CALLS
7683 || (!TARGET_PORTABLE_RUNTIME
7684 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7685 || distance < 240000)))
7691 if (TARGET_PORTABLE_RUNTIME)
7694 /* Out of reach, can use ble. */
7699 output_indirect_call (rtx insn, rtx call_dest)
7705 xoperands[0] = call_dest;
7706 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7707 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7711 /* First the special case for kernels, level 0 systems, etc. */
7712 if (TARGET_FAST_INDIRECT_CALLS)
7713 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7715 /* Now the normal case -- we can reach $$dyncall directly or
7716 we're sure that we can get there via a long-branch stub.
7718 No need to check target flags as the length uniquely identifies
7719 the remaining cases. */
7720 if (attr_length_indirect_call (insn) == 8)
7722 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7723 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
7724 variant of the B,L instruction can't be used on the SOM target. */
7725 if (TARGET_PA_20 && !TARGET_SOM)
7726 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7728 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7731 /* Long millicode call, but we are not generating PIC or portable runtime
7733 if (attr_length_indirect_call (insn) == 12)
7734 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7736 /* Long millicode call for portable runtime. */
7737 if (attr_length_indirect_call (insn) == 20)
7738 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7740 /* We need a long PIC call to $$dyncall. */
7741 xoperands[0] = NULL_RTX;
7742 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7743 if (TARGET_SOM || !TARGET_GAS)
7745 xoperands[0] = gen_label_rtx ();
7746 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7747 targetm.asm_out.internal_label (asm_out_file, "L",
7748 CODE_LABEL_NUMBER (xoperands[0]));
7749 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7753 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7754 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7757 output_asm_insn ("blr %%r0,%%r2", xoperands);
7758 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7762 /* Return the total length of the save and restore instructions needed for
7763 the data linkage table pointer (i.e., the PIC register) across the call
7764 instruction INSN. No-return calls do not require a save and restore.
7765 In addition, we may be able to avoid the save and restore for calls
7766 within the same translation unit. */
7769 attr_length_save_restore_dltp (rtx insn)
7771 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7777 /* In HPUX 8.0's shared library scheme, special relocations are needed
7778 for function labels if they might be passed to a function
7779 in a shared library (because shared libraries don't live in code
7780 space), and special magic is needed to construct their address. */
7783 hppa_encode_label (rtx sym)
7785 const char *str = XSTR (sym, 0);
7786 int len = strlen (str) + 1;
7789 p = newstr = alloca (len + 1);
7793 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7797 pa_encode_section_info (tree decl, rtx rtl, int first)
7799 default_encode_section_info (decl, rtl, first);
7801 if (first && TEXT_SPACE_P (decl))
7803 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7804 if (TREE_CODE (decl) == FUNCTION_DECL)
7805 hppa_encode_label (XEXP (rtl, 0));
7809 /* This is sort of inverse to pa_encode_section_info. */
7812 pa_strip_name_encoding (const char *str)
7814 str += (*str == '@');
7815 str += (*str == '*');
7820 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7822 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7825 /* Returns 1 if OP is a function label involved in a simple addition
7826 with a constant. Used to keep certain patterns from matching
7827 during instruction combination. */
7829 is_function_label_plus_const (rtx op)
7831 /* Strip off any CONST. */
7832 if (GET_CODE (op) == CONST)
7835 return (GET_CODE (op) == PLUS
7836 && function_label_operand (XEXP (op, 0), Pmode)
7837 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7840 /* Output assembly code for a thunk to FUNCTION. */
7843 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7844 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7847 static unsigned int current_thunk_number;
7848 int val_14 = VAL_14_BITS_P (delta);
7853 xoperands[0] = XEXP (DECL_RTL (function), 0);
7854 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
7855 xoperands[2] = GEN_INT (delta);
7857 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
7858 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7860 /* Output the thunk. We know that the function is in the same
7861 translation unit (i.e., the same space) as the thunk, and that
7862 thunks are output after their method. Thus, we don't need an
7863 external branch to reach the function. With SOM and GAS,
7864 functions and thunks are effectively in different sections.
7865 Thus, we can always use a IA-relative branch and the linker
7866 will add a long branch stub if necessary.
7868 However, we have to be careful when generating PIC code on the
7869 SOM port to ensure that the sequence does not transfer to an
7870 import stub for the target function as this could clobber the
7871 return value saved at SP-24. This would also apply to the
7872 32-bit linux port if the multi-space model is implemented. */
7873 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7874 && !(flag_pic && TREE_PUBLIC (function))
7875 && (TARGET_GAS || last_address < 262132))
7876 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7877 && ((targetm.have_named_sections
7878 && DECL_SECTION_NAME (thunk_fndecl) != NULL
7879 /* The GNU 64-bit linker has rather poor stub management.
7880 So, we use a long branch from thunks that aren't in
7881 the same section as the target function. */
7883 && (DECL_SECTION_NAME (thunk_fndecl)
7884 != DECL_SECTION_NAME (function)))
7885 || ((DECL_SECTION_NAME (thunk_fndecl)
7886 == DECL_SECTION_NAME (function))
7887 && last_address < 262132)))
7888 || (!targetm.have_named_sections && last_address < 262132))))
7891 output_asm_insn ("addil L'%2,%%r26", xoperands);
7893 output_asm_insn ("b %0", xoperands);
7897 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7902 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7906 else if (TARGET_64BIT)
7908 /* We only have one call-clobbered scratch register, so we can't
7909 make use of the delay slot if delta doesn't fit in 14 bits. */
7912 output_asm_insn ("addil L'%2,%%r26", xoperands);
7913 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7916 output_asm_insn ("b,l .+8,%%r1", xoperands);
7920 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7921 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7925 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
7926 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
7931 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7932 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7937 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
7941 else if (TARGET_PORTABLE_RUNTIME)
7943 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7944 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
7947 output_asm_insn ("addil L'%2,%%r26", xoperands);
7949 output_asm_insn ("bv %%r0(%%r22)", xoperands);
7953 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7958 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7962 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7964 /* The function is accessible from outside this module. The only
7965 way to avoid an import stub between the thunk and function is to
7966 call the function directly with an indirect sequence similar to
7967 that used by $$dyncall. This is possible because $$dyncall acts
7968 as the import stub in an indirect call. */
7969 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7970 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
7971 output_asm_insn ("addil LT'%3,%%r19", xoperands);
7972 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
7973 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
7974 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
7975 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
7976 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
7977 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
7981 output_asm_insn ("addil L'%2,%%r26", xoperands);
7987 output_asm_insn ("bve (%%r22)", xoperands);
7990 else if (TARGET_NO_SPACE_REGS)
7992 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
7997 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
7998 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
7999 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8004 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8006 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8010 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8012 if (TARGET_SOM || !TARGET_GAS)
8014 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8015 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8019 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8020 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8024 output_asm_insn ("addil L'%2,%%r26", xoperands);
8026 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8030 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8035 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8042 output_asm_insn ("addil L'%2,%%r26", xoperands);
8044 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8045 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8049 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8054 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8059 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8061 if (TARGET_SOM && TARGET_GAS)
8063 /* We done with this subspace except possibly for some additional
8064 debug information. Forget that we are in this subspace to ensure
8065 that the next function is output in its own subspace. */
8067 cfun->machine->in_nsubspa = 2;
8070 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8072 switch_to_section (data_section);
8073 output_asm_insn (".align 4", xoperands);
8074 ASM_OUTPUT_LABEL (file, label);
8075 output_asm_insn (".word P'%0", xoperands);
8078 current_thunk_number++;
8079 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8080 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8081 last_address += nbytes;
8082 update_total_code_bytes (nbytes);
8085 /* Only direct calls to static functions are allowed to be sibling (tail)
8088 This restriction is necessary because some linker generated stubs will
8089 store return pointers into rp' in some cases which might clobber a
8090 live value already in rp'.
8092 In a sibcall the current function and the target function share stack
8093 space. Thus if the path to the current function and the path to the
8094 target function save a value in rp', they save the value into the
8095 same stack slot, which has undesirable consequences.
8097 Because of the deferred binding nature of shared libraries any function
8098 with external scope could be in a different load module and thus require
8099 rp' to be saved when calling that function. So sibcall optimizations
8100 can only be safe for static function.
8102 Note that GCC never needs return value relocations, so we don't have to
8103 worry about static calls with return value relocations (which require
8106 It is safe to perform a sibcall optimization when the target function
8107 will never return. */
8109 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8111 if (TARGET_PORTABLE_RUNTIME)
8114 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8115 single subspace mode and the call is not indirect. As far as I know,
8116 there is no operating system support for the multiple subspace mode.
8117 It might be possible to support indirect calls if we didn't use
8118 $$dyncall (see the indirect sequence generated in output_call). */
8120 return (decl != NULL_TREE);
8122 /* Sibcalls are not ok because the arg pointer register is not a fixed
8123 register. This prevents the sibcall optimization from occurring. In
8124 addition, there are problems with stub placement using GNU ld. This
8125 is because a normal sibcall branch uses a 17-bit relocation while
8126 a regular call branch uses a 22-bit relocation. As a result, more
8127 care needs to be taken in the placement of long-branch stubs. */
8131 /* Sibcalls are only ok within a translation unit. */
8132 return (decl && !TREE_PUBLIC (decl));
8135 /* ??? Addition is not commutative on the PA due to the weird implicit
8136 space register selection rules for memory addresses. Therefore, we
8137 don't consider a + b == b + a, as this might be inside a MEM. */
8139 pa_commutative_p (rtx x, int outer_code)
8141 return (COMMUTATIVE_P (x)
8142 && (TARGET_NO_SPACE_REGS
8143 || (outer_code != UNKNOWN && outer_code != MEM)
8144 || GET_CODE (x) != PLUS));
8147 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8148 use in fmpyadd instructions. */
8150 fmpyaddoperands (rtx *operands)
8152 enum machine_mode mode = GET_MODE (operands[0]);
8154 /* Must be a floating point mode. */
8155 if (mode != SFmode && mode != DFmode)
8158 /* All modes must be the same. */
8159 if (! (mode == GET_MODE (operands[1])
8160 && mode == GET_MODE (operands[2])
8161 && mode == GET_MODE (operands[3])
8162 && mode == GET_MODE (operands[4])
8163 && mode == GET_MODE (operands[5])))
8166 /* All operands must be registers. */
8167 if (! (GET_CODE (operands[1]) == REG
8168 && GET_CODE (operands[2]) == REG
8169 && GET_CODE (operands[3]) == REG
8170 && GET_CODE (operands[4]) == REG
8171 && GET_CODE (operands[5]) == REG))
8174 /* Only 2 real operands to the addition. One of the input operands must
8175 be the same as the output operand. */
8176 if (! rtx_equal_p (operands[3], operands[4])
8177 && ! rtx_equal_p (operands[3], operands[5]))
8180 /* Inout operand of add cannot conflict with any operands from multiply. */
8181 if (rtx_equal_p (operands[3], operands[0])
8182 || rtx_equal_p (operands[3], operands[1])
8183 || rtx_equal_p (operands[3], operands[2]))
8186 /* multiply cannot feed into addition operands. */
8187 if (rtx_equal_p (operands[4], operands[0])
8188 || rtx_equal_p (operands[5], operands[0]))
8191 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8193 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8194 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8195 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8196 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8197 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8198 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8201 /* Passed. Operands are suitable for fmpyadd. */
8205 #if !defined(USE_COLLECT2)
8207 pa_asm_out_constructor (rtx symbol, int priority)
8209 if (!function_label_operand (symbol, VOIDmode))
8210 hppa_encode_label (symbol);
8212 #ifdef CTORS_SECTION_ASM_OP
8213 default_ctor_section_asm_out_constructor (symbol, priority);
8215 # ifdef TARGET_ASM_NAMED_SECTION
8216 default_named_section_asm_out_constructor (symbol, priority);
8218 default_stabs_asm_out_constructor (symbol, priority);
8224 pa_asm_out_destructor (rtx symbol, int priority)
8226 if (!function_label_operand (symbol, VOIDmode))
8227 hppa_encode_label (symbol);
8229 #ifdef DTORS_SECTION_ASM_OP
8230 default_dtor_section_asm_out_destructor (symbol, priority);
8232 # ifdef TARGET_ASM_NAMED_SECTION
8233 default_named_section_asm_out_destructor (symbol, priority);
8235 default_stabs_asm_out_destructor (symbol, priority);
8241 /* This function places uninitialized global data in the bss section.
8242 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8243 function on the SOM port to prevent uninitialized global data from
8244 being placed in the data section. */
8247 pa_asm_output_aligned_bss (FILE *stream,
8249 unsigned HOST_WIDE_INT size,
8252 switch_to_section (bss_section);
8253 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8255 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8256 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8259 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8260 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8263 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8264 ASM_OUTPUT_LABEL (stream, name);
8265 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8268 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8269 that doesn't allow the alignment of global common storage to be directly
8270 specified. The SOM linker aligns common storage based on the rounded
8271 value of the NUM_BYTES parameter in the .comm directive. It's not
8272 possible to use the .align directive as it doesn't affect the alignment
8273 of the label associated with a .comm directive. */
8276 pa_asm_output_aligned_common (FILE *stream,
8278 unsigned HOST_WIDE_INT size,
8281 unsigned int max_common_align;
8283 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8284 if (align > max_common_align)
8286 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8287 "for global common data. Using %u",
8288 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8289 align = max_common_align;
8292 switch_to_section (bss_section);
8294 assemble_name (stream, name);
8295 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8296 MAX (size, align / BITS_PER_UNIT));
8299 /* We can't use .comm for local common storage as the SOM linker effectively
8300 treats the symbol as universal and uses the same storage for local symbols
8301 with the same name in different object files. The .block directive
8302 reserves an uninitialized block of storage. However, it's not common
8303 storage. Fortunately, GCC never requests common storage with the same
8304 name in any given translation unit. */
8307 pa_asm_output_aligned_local (FILE *stream,
8309 unsigned HOST_WIDE_INT size,
8312 switch_to_section (bss_section);
8313 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8316 fprintf (stream, "%s", LOCAL_ASM_OP);
8317 assemble_name (stream, name);
8318 fprintf (stream, "\n");
8321 ASM_OUTPUT_LABEL (stream, name);
8322 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8325 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8326 use in fmpysub instructions. */
8328 fmpysuboperands (rtx *operands)
8330 enum machine_mode mode = GET_MODE (operands[0]);
8332 /* Must be a floating point mode. */
8333 if (mode != SFmode && mode != DFmode)
8336 /* All modes must be the same. */
8337 if (! (mode == GET_MODE (operands[1])
8338 && mode == GET_MODE (operands[2])
8339 && mode == GET_MODE (operands[3])
8340 && mode == GET_MODE (operands[4])
8341 && mode == GET_MODE (operands[5])))
8344 /* All operands must be registers. */
8345 if (! (GET_CODE (operands[1]) == REG
8346 && GET_CODE (operands[2]) == REG
8347 && GET_CODE (operands[3]) == REG
8348 && GET_CODE (operands[4]) == REG
8349 && GET_CODE (operands[5]) == REG))
8352 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8353 operation, so operands[4] must be the same as operand[3]. */
8354 if (! rtx_equal_p (operands[3], operands[4]))
8357 /* multiply cannot feed into subtraction. */
8358 if (rtx_equal_p (operands[5], operands[0]))
8361 /* Inout operand of sub cannot conflict with any operands from multiply. */
8362 if (rtx_equal_p (operands[3], operands[0])
8363 || rtx_equal_p (operands[3], operands[1])
8364 || rtx_equal_p (operands[3], operands[2]))
8367 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8369 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8370 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8371 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8372 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8373 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8374 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8377 /* Passed. Operands are suitable for fmpysub. */
8381 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8382 constants for shadd instructions. */
8384 shadd_constant_p (int val)
8386 if (val == 2 || val == 4 || val == 8)
8392 /* Return 1 if OP is valid as a base or index register in a
8396 borx_reg_operand (rtx op, enum machine_mode mode)
8398 if (GET_CODE (op) != REG)
8401 /* We must reject virtual registers as the only expressions that
8402 can be instantiated are REG and REG+CONST. */
8403 if (op == virtual_incoming_args_rtx
8404 || op == virtual_stack_vars_rtx
8405 || op == virtual_stack_dynamic_rtx
8406 || op == virtual_outgoing_args_rtx
8407 || op == virtual_cfa_rtx)
8410 /* While it's always safe to index off the frame pointer, it's not
8411 profitable to do so when the frame pointer is being eliminated. */
8412 if (!reload_completed
8413 && flag_omit_frame_pointer
8414 && !current_function_calls_alloca
8415 && op == frame_pointer_rtx)
8418 return register_operand (op, mode);
8421 /* Return 1 if this operand is anything other than a hard register. */
8424 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8426 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8429 /* Return 1 if INSN branches forward. Should be using insn_addresses
8430 to avoid walking through all the insns... */
8432 forward_branch_p (rtx insn)
8434 rtx label = JUMP_LABEL (insn);
8441 insn = NEXT_INSN (insn);
8444 return (insn == label);
8447 /* Return 1 if OP is an equality comparison, else return 0. */
8449 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8451 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8454 /* Return 1 if INSN is in the delay slot of a call instruction. */
8456 jump_in_call_delay (rtx insn)
8459 if (GET_CODE (insn) != JUMP_INSN)
8462 if (PREV_INSN (insn)
8463 && PREV_INSN (PREV_INSN (insn))
8464 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8466 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8468 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8469 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8476 /* Output an unconditional move and branch insn. */
8479 output_parallel_movb (rtx *operands, rtx insn)
8481 int length = get_attr_length (insn);
8483 /* These are the cases in which we win. */
8485 return "mov%I1b,tr %1,%0,%2";
8487 /* None of the following cases win, but they don't lose either. */
8490 if (dbr_sequence_length () == 0)
8492 /* Nothing in the delay slot, fake it by putting the combined
8493 insn (the copy or add) in the delay slot of a bl. */
8494 if (GET_CODE (operands[1]) == CONST_INT)
8495 return "b %2\n\tldi %1,%0";
8497 return "b %2\n\tcopy %1,%0";
8501 /* Something in the delay slot, but we've got a long branch. */
8502 if (GET_CODE (operands[1]) == CONST_INT)
8503 return "ldi %1,%0\n\tb %2";
8505 return "copy %1,%0\n\tb %2";
8509 if (GET_CODE (operands[1]) == CONST_INT)
8510 output_asm_insn ("ldi %1,%0", operands);
8512 output_asm_insn ("copy %1,%0", operands);
8513 return output_lbranch (operands[2], insn, 1);
8516 /* Output an unconditional add and branch insn. */
8519 output_parallel_addb (rtx *operands, rtx insn)
8521 int length = get_attr_length (insn);
8523 /* To make life easy we want operand0 to be the shared input/output
8524 operand and operand1 to be the readonly operand. */
8525 if (operands[0] == operands[1])
8526 operands[1] = operands[2];
8528 /* These are the cases in which we win. */
8530 return "add%I1b,tr %1,%0,%3";
8532 /* None of the following cases win, but they don't lose either. */
8535 if (dbr_sequence_length () == 0)
8536 /* Nothing in the delay slot, fake it by putting the combined
8537 insn (the copy or add) in the delay slot of a bl. */
8538 return "b %3\n\tadd%I1 %1,%0,%0";
8540 /* Something in the delay slot, but we've got a long branch. */
8541 return "add%I1 %1,%0,%0\n\tb %3";
8544 output_asm_insn ("add%I1 %1,%0,%0", operands);
8545 return output_lbranch (operands[3], insn, 1);
8548 /* Return nonzero if INSN (a jump insn) immediately follows a call
8549 to a named function. This is used to avoid filling the delay slot
8550 of the jump since it can usually be eliminated by modifying RP in
8551 the delay slot of the call. */
8554 following_call (rtx insn)
8556 if (! TARGET_JUMP_IN_DELAY)
8559 /* Find the previous real insn, skipping NOTEs. */
8560 insn = PREV_INSN (insn);
8561 while (insn && GET_CODE (insn) == NOTE)
8562 insn = PREV_INSN (insn);
8564 /* Check for CALL_INSNs and millicode calls. */
8566 && ((GET_CODE (insn) == CALL_INSN
8567 && get_attr_type (insn) != TYPE_DYNCALL)
8568 || (GET_CODE (insn) == INSN
8569 && GET_CODE (PATTERN (insn)) != SEQUENCE
8570 && GET_CODE (PATTERN (insn)) != USE
8571 && GET_CODE (PATTERN (insn)) != CLOBBER
8572 && get_attr_type (insn) == TYPE_MILLI)))
8578 /* We use this hook to perform a PA specific optimization which is difficult
8579 to do in earlier passes.
8581 We want the delay slots of branches within jump tables to be filled.
8582 None of the compiler passes at the moment even has the notion that a
8583 PA jump table doesn't contain addresses, but instead contains actual
8586 Because we actually jump into the table, the addresses of each entry
8587 must stay constant in relation to the beginning of the table (which
8588 itself must stay constant relative to the instruction to jump into
8589 it). I don't believe we can guarantee earlier passes of the compiler
8590 will adhere to those rules.
8592 So, late in the compilation process we find all the jump tables, and
8593 expand them into real code -- e.g. each entry in the jump table vector
8594 will get an appropriate label followed by a jump to the final target.
8596 Reorg and the final jump pass can then optimize these branches and
8597 fill their delay slots. We end up with smaller, more efficient code.
8599 The jump instructions within the table are special; we must be able
8600 to identify them during assembly output (if the jumps don't get filled
8601 we need to emit a nop rather than nullifying the delay slot)). We
8602 identify jumps in switch tables by using insns with the attribute
8603 type TYPE_BTABLE_BRANCH.
8605 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8606 insns. This serves two purposes, first it prevents jump.c from
8607 noticing that the last N entries in the table jump to the instruction
8608 immediately after the table and deleting the jumps. Second, those
8609 insns mark where we should emit .begin_brtab and .end_brtab directives
8610 when using GAS (allows for better link time optimizations). */
8617 remove_useless_addtr_insns (1);
8619 if (pa_cpu < PROCESSOR_8000)
8620 pa_combine_instructions ();
8623 /* This is fairly cheap, so always run it if optimizing. */
8624 if (optimize > 0 && !TARGET_BIG_SWITCH)
8626 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8627 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8629 rtx pattern, tmp, location, label;
8630 unsigned int length, i;
8632 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8633 if (GET_CODE (insn) != JUMP_INSN
8634 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8635 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8638 /* Emit marker for the beginning of the branch table. */
8639 emit_insn_before (gen_begin_brtab (), insn);
8641 pattern = PATTERN (insn);
8642 location = PREV_INSN (insn);
8643 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8645 for (i = 0; i < length; i++)
8647 /* Emit a label before each jump to keep jump.c from
8648 removing this code. */
8649 tmp = gen_label_rtx ();
8650 LABEL_NUSES (tmp) = 1;
8651 emit_label_after (tmp, location);
8652 location = NEXT_INSN (location);
8654 if (GET_CODE (pattern) == ADDR_VEC)
8655 label = XEXP (XVECEXP (pattern, 0, i), 0);
8657 label = XEXP (XVECEXP (pattern, 1, i), 0);
8659 tmp = gen_short_jump (label);
8661 /* Emit the jump itself. */
8662 tmp = emit_jump_insn_after (tmp, location);
8663 JUMP_LABEL (tmp) = label;
8664 LABEL_NUSES (label)++;
8665 location = NEXT_INSN (location);
8667 /* Emit a BARRIER after the jump. */
8668 emit_barrier_after (location);
8669 location = NEXT_INSN (location);
8672 /* Emit marker for the end of the branch table. */
8673 emit_insn_before (gen_end_brtab (), location);
8674 location = NEXT_INSN (location);
8675 emit_barrier_after (location);
8677 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8683 /* Still need brtab marker insns. FIXME: the presence of these
8684 markers disables output of the branch table to readonly memory,
8685 and any alignment directives that might be needed. Possibly,
8686 the begin_brtab insn should be output before the label for the
8687 table. This doesn't matter at the moment since the tables are
8688 always output in the text section. */
8689 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8691 /* Find an ADDR_VEC insn. */
8692 if (GET_CODE (insn) != JUMP_INSN
8693 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8694 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8697 /* Now generate markers for the beginning and end of the
8699 emit_insn_before (gen_begin_brtab (), insn);
8700 emit_insn_after (gen_end_brtab (), insn);
8705 /* The PA has a number of odd instructions which can perform multiple
8706 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8707 it may be profitable to combine two instructions into one instruction
8708 with two outputs. It's not profitable PA2.0 machines because the
8709 two outputs would take two slots in the reorder buffers.
8711 This routine finds instructions which can be combined and combines
8712 them. We only support some of the potential combinations, and we
8713 only try common ways to find suitable instructions.
8715 * addb can add two registers or a register and a small integer
8716 and jump to a nearby (+-8k) location. Normally the jump to the
8717 nearby location is conditional on the result of the add, but by
8718 using the "true" condition we can make the jump unconditional.
8719 Thus addb can perform two independent operations in one insn.
8721 * movb is similar to addb in that it can perform a reg->reg
8722 or small immediate->reg copy and jump to a nearby (+-8k location).
8724 * fmpyadd and fmpysub can perform a FP multiply and either an
8725 FP add or FP sub if the operands of the multiply and add/sub are
8726 independent (there are other minor restrictions). Note both
8727 the fmpy and fadd/fsub can in theory move to better spots according
8728 to data dependencies, but for now we require the fmpy stay at a
8731 * Many of the memory operations can perform pre & post updates
8732 of index registers. GCC's pre/post increment/decrement addressing
8733 is far too simple to take advantage of all the possibilities. This
8734 pass may not be suitable since those insns may not be independent.
8736 * comclr can compare two ints or an int and a register, nullify
8737 the following instruction and zero some other register. This
8738 is more difficult to use as it's harder to find an insn which
8739 will generate a comclr than finding something like an unconditional
8740 branch. (conditional moves & long branches create comclr insns).
8742 * Most arithmetic operations can conditionally skip the next
8743 instruction. They can be viewed as "perform this operation
8744 and conditionally jump to this nearby location" (where nearby
8745 is an insns away). These are difficult to use due to the
8746 branch length restrictions. */
8749 pa_combine_instructions (void)
8753 /* This can get expensive since the basic algorithm is on the
8754 order of O(n^2) (or worse). Only do it for -O2 or higher
8755 levels of optimization. */
8759 /* Walk down the list of insns looking for "anchor" insns which
8760 may be combined with "floating" insns. As the name implies,
8761 "anchor" instructions don't move, while "floating" insns may
8763 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8764 new = make_insn_raw (new);
8766 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8768 enum attr_pa_combine_type anchor_attr;
8769 enum attr_pa_combine_type floater_attr;
8771 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8772 Also ignore any special USE insns. */
8773 if ((GET_CODE (anchor) != INSN
8774 && GET_CODE (anchor) != JUMP_INSN
8775 && GET_CODE (anchor) != CALL_INSN)
8776 || GET_CODE (PATTERN (anchor)) == USE
8777 || GET_CODE (PATTERN (anchor)) == CLOBBER
8778 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8779 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8782 anchor_attr = get_attr_pa_combine_type (anchor);
8783 /* See if anchor is an insn suitable for combination. */
8784 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8785 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8786 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8787 && ! forward_branch_p (anchor)))
8791 for (floater = PREV_INSN (anchor);
8793 floater = PREV_INSN (floater))
8795 if (GET_CODE (floater) == NOTE
8796 || (GET_CODE (floater) == INSN
8797 && (GET_CODE (PATTERN (floater)) == USE
8798 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8801 /* Anything except a regular INSN will stop our search. */
8802 if (GET_CODE (floater) != INSN
8803 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8804 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8810 /* See if FLOATER is suitable for combination with the
8812 floater_attr = get_attr_pa_combine_type (floater);
8813 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8814 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8815 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8816 && floater_attr == PA_COMBINE_TYPE_FMPY))
8818 /* If ANCHOR and FLOATER can be combined, then we're
8819 done with this pass. */
8820 if (pa_can_combine_p (new, anchor, floater, 0,
8821 SET_DEST (PATTERN (floater)),
8822 XEXP (SET_SRC (PATTERN (floater)), 0),
8823 XEXP (SET_SRC (PATTERN (floater)), 1)))
8827 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8828 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8830 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8832 if (pa_can_combine_p (new, anchor, floater, 0,
8833 SET_DEST (PATTERN (floater)),
8834 XEXP (SET_SRC (PATTERN (floater)), 0),
8835 XEXP (SET_SRC (PATTERN (floater)), 1)))
8840 if (pa_can_combine_p (new, anchor, floater, 0,
8841 SET_DEST (PATTERN (floater)),
8842 SET_SRC (PATTERN (floater)),
8843 SET_SRC (PATTERN (floater))))
8849 /* If we didn't find anything on the backwards scan try forwards. */
8851 && (anchor_attr == PA_COMBINE_TYPE_FMPY
8852 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8854 for (floater = anchor; floater; floater = NEXT_INSN (floater))
8856 if (GET_CODE (floater) == NOTE
8857 || (GET_CODE (floater) == INSN
8858 && (GET_CODE (PATTERN (floater)) == USE
8859 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8863 /* Anything except a regular INSN will stop our search. */
8864 if (GET_CODE (floater) != INSN
8865 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8866 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8872 /* See if FLOATER is suitable for combination with the
8874 floater_attr = get_attr_pa_combine_type (floater);
8875 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8876 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8877 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8878 && floater_attr == PA_COMBINE_TYPE_FMPY))
8880 /* If ANCHOR and FLOATER can be combined, then we're
8881 done with this pass. */
8882 if (pa_can_combine_p (new, anchor, floater, 1,
8883 SET_DEST (PATTERN (floater)),
8884 XEXP (SET_SRC (PATTERN (floater)),
8886 XEXP (SET_SRC (PATTERN (floater)),
8893 /* FLOATER will be nonzero if we found a suitable floating
8894 insn for combination with ANCHOR. */
8896 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8897 || anchor_attr == PA_COMBINE_TYPE_FMPY))
8899 /* Emit the new instruction and delete the old anchor. */
8900 emit_insn_before (gen_rtx_PARALLEL
8902 gen_rtvec (2, PATTERN (anchor),
8903 PATTERN (floater))),
8906 SET_INSN_DELETED (anchor);
8908 /* Emit a special USE insn for FLOATER, then delete
8909 the floating insn. */
8910 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8911 delete_insn (floater);
8916 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8919 /* Emit the new_jump instruction and delete the old anchor. */
8921 = emit_jump_insn_before (gen_rtx_PARALLEL
8923 gen_rtvec (2, PATTERN (anchor),
8924 PATTERN (floater))),
8927 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8928 SET_INSN_DELETED (anchor);
8930 /* Emit a special USE insn for FLOATER, then delete
8931 the floating insn. */
8932 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8933 delete_insn (floater);
8941 pa_can_combine_p (rtx new, rtx anchor, rtx floater, int reversed, rtx dest,
8944 int insn_code_number;
8947 /* Create a PARALLEL with the patterns of ANCHOR and
8948 FLOATER, try to recognize it, then test constraints
8949 for the resulting pattern.
8951 If the pattern doesn't match or the constraints
8952 aren't met keep searching for a suitable floater
8954 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8955 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8956 INSN_CODE (new) = -1;
8957 insn_code_number = recog_memoized (new);
8958 if (insn_code_number < 0
8959 || (extract_insn (new), ! constrain_operands (1)))
8973 /* There's up to three operands to consider. One
8974 output and two inputs.
8976 The output must not be used between FLOATER & ANCHOR
8977 exclusive. The inputs must not be set between
8978 FLOATER and ANCHOR exclusive. */
8980 if (reg_used_between_p (dest, start, end))
8983 if (reg_set_between_p (src1, start, end))
8986 if (reg_set_between_p (src2, start, end))
8989 /* If we get here, then everything is good. */
8993 /* Return nonzero if references for INSN are delayed.
8995 Millicode insns are actually function calls with some special
8996 constraints on arguments and register usage.
8998 Millicode calls always expect their arguments in the integer argument
8999 registers, and always return their result in %r29 (ret1). They
9000 are expected to clobber their arguments, %r1, %r29, and the return
9001 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9003 This function tells reorg that the references to arguments and
9004 millicode calls do not appear to happen until after the millicode call.
9005 This allows reorg to put insns which set the argument registers into the
9006 delay slot of the millicode call -- thus they act more like traditional
9009 Note we cannot consider side effects of the insn to be delayed because
9010 the branch and link insn will clobber the return pointer. If we happened
9011 to use the return pointer in the delay slot of the call, then we lose.
9013 get_attr_type will try to recognize the given insn, so make sure to
9014 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9017 insn_refs_are_delayed (rtx insn)
9019 return ((GET_CODE (insn) == INSN
9020 && GET_CODE (PATTERN (insn)) != SEQUENCE
9021 && GET_CODE (PATTERN (insn)) != USE
9022 && GET_CODE (PATTERN (insn)) != CLOBBER
9023 && get_attr_type (insn) == TYPE_MILLI));
9026 /* On the HP-PA the value is found in register(s) 28(-29), unless
9027 the mode is SF or DF. Then the value is returned in fr4 (32).
9029 This must perform the same promotions as PROMOTE_MODE, else
9030 TARGET_PROMOTE_FUNCTION_RETURN will not work correctly.
9032 Small structures must be returned in a PARALLEL on PA64 in order
9033 to match the HP Compiler ABI. */
9036 function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
9038 enum machine_mode valmode;
9040 if (AGGREGATE_TYPE_P (valtype)
9041 || TREE_CODE (valtype) == COMPLEX_TYPE
9042 || TREE_CODE (valtype) == VECTOR_TYPE)
9046 /* Aggregates with a size less than or equal to 128 bits are
9047 returned in GR 28(-29). They are left justified. The pad
9048 bits are undefined. Larger aggregates are returned in
9052 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9054 for (i = 0; i < ub; i++)
9056 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9057 gen_rtx_REG (DImode, 28 + i),
9062 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9064 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9066 /* Aggregates 5 to 8 bytes in size are returned in general
9067 registers r28-r29 in the same manner as other non
9068 floating-point objects. The data is right-justified and
9069 zero-extended to 64 bits. This is opposite to the normal
9070 justification used on big endian targets and requires
9071 special treatment. */
9072 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9073 gen_rtx_REG (DImode, 28), const0_rtx);
9074 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9078 if ((INTEGRAL_TYPE_P (valtype)
9079 && TYPE_PRECISION (valtype) < BITS_PER_WORD)
9080 || POINTER_TYPE_P (valtype))
9081 valmode = word_mode;
9083 valmode = TYPE_MODE (valtype);
9085 if (TREE_CODE (valtype) == REAL_TYPE
9086 && !AGGREGATE_TYPE_P (valtype)
9087 && TYPE_MODE (valtype) != TFmode
9088 && !TARGET_SOFT_FLOAT)
9089 return gen_rtx_REG (valmode, 32);
9091 return gen_rtx_REG (valmode, 28);
9094 /* Return the location of a parameter that is passed in a register or NULL
9095 if the parameter has any component that is passed in memory.
9097 This is new code and will be pushed to into the net sources after
9100 ??? We might want to restructure this so that it looks more like other
9103 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
9104 int named ATTRIBUTE_UNUSED)
9106 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9113 if (mode == VOIDmode)
9116 arg_size = FUNCTION_ARG_SIZE (mode, type);
9118 /* If this arg would be passed partially or totally on the stack, then
9119 this routine should return zero. pa_arg_partial_bytes will
9120 handle arguments which are split between regs and stack slots if
9121 the ABI mandates split arguments. */
9124 /* The 32-bit ABI does not split arguments. */
9125 if (cum->words + arg_size > max_arg_words)
9131 alignment = cum->words & 1;
9132 if (cum->words + alignment >= max_arg_words)
9136 /* The 32bit ABIs and the 64bit ABIs are rather different,
9137 particularly in their handling of FP registers. We might
9138 be able to cleverly share code between them, but I'm not
9139 going to bother in the hope that splitting them up results
9140 in code that is more easily understood. */
9144 /* Advance the base registers to their current locations.
9146 Remember, gprs grow towards smaller register numbers while
9147 fprs grow to higher register numbers. Also remember that
9148 although FP regs are 32-bit addressable, we pretend that
9149 the registers are 64-bits wide. */
9150 gpr_reg_base = 26 - cum->words;
9151 fpr_reg_base = 32 + cum->words;
9153 /* Arguments wider than one word and small aggregates need special
9157 || (type && (AGGREGATE_TYPE_P (type)
9158 || TREE_CODE (type) == COMPLEX_TYPE
9159 || TREE_CODE (type) == VECTOR_TYPE)))
9161 /* Double-extended precision (80-bit), quad-precision (128-bit)
9162 and aggregates including complex numbers are aligned on
9163 128-bit boundaries. The first eight 64-bit argument slots
9164 are associated one-to-one, with general registers r26
9165 through r19, and also with floating-point registers fr4
9166 through fr11. Arguments larger than one word are always
9167 passed in general registers.
9169 Using a PARALLEL with a word mode register results in left
9170 justified data on a big-endian target. */
9173 int i, offset = 0, ub = arg_size;
9175 /* Align the base register. */
9176 gpr_reg_base -= alignment;
9178 ub = MIN (ub, max_arg_words - cum->words - alignment);
9179 for (i = 0; i < ub; i++)
9181 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9182 gen_rtx_REG (DImode, gpr_reg_base),
9188 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9193 /* If the argument is larger than a word, then we know precisely
9194 which registers we must use. */
9208 /* Structures 5 to 8 bytes in size are passed in the general
9209 registers in the same manner as other non floating-point
9210 objects. The data is right-justified and zero-extended
9211 to 64 bits. This is opposite to the normal justification
9212 used on big endian targets and requires special treatment.
9213 We now define BLOCK_REG_PADDING to pad these objects.
9214 Aggregates, complex and vector types are passed in the same
9215 manner as structures. */
9217 || (type && (AGGREGATE_TYPE_P (type)
9218 || TREE_CODE (type) == COMPLEX_TYPE
9219 || TREE_CODE (type) == VECTOR_TYPE)))
9221 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9222 gen_rtx_REG (DImode, gpr_reg_base),
9224 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9229 /* We have a single word (32 bits). A simple computation
9230 will get us the register #s we need. */
9231 gpr_reg_base = 26 - cum->words;
9232 fpr_reg_base = 32 + 2 * cum->words;
9236 /* Determine if the argument needs to be passed in both general and
9237 floating point registers. */
9238 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9239 /* If we are doing soft-float with portable runtime, then there
9240 is no need to worry about FP regs. */
9241 && !TARGET_SOFT_FLOAT
9242 /* The parameter must be some kind of scalar float, else we just
9243 pass it in integer registers. */
9244 && GET_MODE_CLASS (mode) == MODE_FLOAT
9245 /* The target function must not have a prototype. */
9246 && cum->nargs_prototype <= 0
9247 /* libcalls do not need to pass items in both FP and general
9249 && type != NULL_TREE
9250 /* All this hair applies to "outgoing" args only. This includes
9251 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9253 /* Also pass outgoing floating arguments in both registers in indirect
9254 calls with the 32 bit ABI and the HP assembler since there is no
9255 way to the specify argument locations in static functions. */
9260 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9266 gen_rtx_EXPR_LIST (VOIDmode,
9267 gen_rtx_REG (mode, fpr_reg_base),
9269 gen_rtx_EXPR_LIST (VOIDmode,
9270 gen_rtx_REG (mode, gpr_reg_base),
9275 /* See if we should pass this parameter in a general register. */
9276 if (TARGET_SOFT_FLOAT
9277 /* Indirect calls in the normal 32bit ABI require all arguments
9278 to be passed in general registers. */
9279 || (!TARGET_PORTABLE_RUNTIME
9283 /* If the parameter is not a scalar floating-point parameter,
9284 then it belongs in GPRs. */
9285 || GET_MODE_CLASS (mode) != MODE_FLOAT
9286 /* Structure with single SFmode field belongs in GPR. */
9287 || (type && AGGREGATE_TYPE_P (type)))
9288 retval = gen_rtx_REG (mode, gpr_reg_base);
9290 retval = gen_rtx_REG (mode, fpr_reg_base);
9296 /* If this arg would be passed totally in registers or totally on the stack,
9297 then this routine should return zero. */
9300 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9301 tree type, bool named ATTRIBUTE_UNUSED)
9303 unsigned int max_arg_words = 8;
9304 unsigned int offset = 0;
9309 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9312 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9313 /* Arg fits fully into registers. */
9315 else if (cum->words + offset >= max_arg_words)
9316 /* Arg fully on the stack. */
9320 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9324 /* A get_unnamed_section callback for switching to the text section.
9326 This function is only used with SOM. Because we don't support
9327 named subspaces, we can only create a new subspace or switch back
9328 to the default text subspace. */
9331 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9333 gcc_assert (TARGET_SOM);
9336 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9338 /* We only want to emit a .nsubspa directive once at the
9339 start of the function. */
9340 cfun->machine->in_nsubspa = 1;
9342 /* Create a new subspace for the text. This provides
9343 better stub placement and one-only functions. */
9345 && DECL_ONE_ONLY (cfun->decl)
9346 && !DECL_WEAK (cfun->decl))
9348 output_section_asm_op ("\t.SPACE $TEXT$\n"
9349 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9350 "ACCESS=44,SORT=24,COMDAT");
9356 /* There isn't a current function or the body of the current
9357 function has been completed. So, we are changing to the
9358 text section to output debugging information. Thus, we
9359 need to forget that we are in the text section so that
9360 varasm.c will call us when text_section is selected again. */
9361 gcc_assert (!cfun || !cfun->machine
9362 || cfun->machine->in_nsubspa == 2);
9365 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9368 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9371 /* A get_unnamed_section callback for switching to comdat data
9372 sections. This function is only used with SOM. */
9375 som_output_comdat_data_section_asm_op (const void *data)
9378 output_section_asm_op (data);
9381 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9384 pa_som_asm_init_sections (void)
9387 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9389 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9390 is not being generated. */
9391 som_readonly_data_section
9392 = get_unnamed_section (0, output_section_asm_op,
9393 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9395 /* When secondary definitions are not supported, SOM makes readonly
9396 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9398 som_one_only_readonly_data_section
9399 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9401 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9402 "ACCESS=0x2c,SORT=16,COMDAT");
9405 /* When secondary definitions are not supported, SOM makes data one-only
9406 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9407 som_one_only_data_section
9408 = get_unnamed_section (SECTION_WRITE,
9409 som_output_comdat_data_section_asm_op,
9410 "\t.SPACE $PRIVATE$\n"
9411 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9412 "ACCESS=31,SORT=24,COMDAT");
9414 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9415 which reference data within the $TEXT$ space (for example constant
9416 strings in the $LIT$ subspace).
9418 The assemblers (GAS and HP as) both have problems with handling
9419 the difference of two symbols which is the other correct way to
9420 reference constant data during PIC code generation.
9422 So, there's no way to reference constant data which is in the
9423 $TEXT$ space during PIC generation. Instead place all constant
9424 data into the $PRIVATE$ subspace (this reduces sharing, but it
9425 works correctly). */
9426 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9428 /* We must not have a reference to an external symbol defined in a
9429 shared library in a readonly section, else the SOM linker will
9432 So, we force exception information into the data section. */
9433 exception_section = data_section;
9436 /* On hpux10, the linker will give an error if we have a reference
9437 in the read-only data section to a symbol defined in a shared
9438 library. Therefore, expressions that might require a reloc can
9439 not be placed in the read-only data section. */
9442 pa_select_section (tree exp, int reloc,
9443 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9445 if (TREE_CODE (exp) == VAR_DECL
9446 && TREE_READONLY (exp)
9447 && !TREE_THIS_VOLATILE (exp)
9448 && DECL_INITIAL (exp)
9449 && (DECL_INITIAL (exp) == error_mark_node
9450 || TREE_CONSTANT (DECL_INITIAL (exp)))
9454 && DECL_ONE_ONLY (exp)
9455 && !DECL_WEAK (exp))
9456 return som_one_only_readonly_data_section;
9458 return readonly_data_section;
9460 else if (CONSTANT_CLASS_P (exp) && !reloc)
9461 return readonly_data_section;
9463 && TREE_CODE (exp) == VAR_DECL
9464 && DECL_ONE_ONLY (exp)
9465 && !DECL_WEAK (exp))
9466 return som_one_only_data_section;
9468 return data_section;
9472 pa_globalize_label (FILE *stream, const char *name)
9474 /* We only handle DATA objects here, functions are globalized in
9475 ASM_DECLARE_FUNCTION_NAME. */
9476 if (! FUNCTION_NAME_P (name))
9478 fputs ("\t.EXPORT ", stream);
9479 assemble_name (stream, name);
9480 fputs (",DATA\n", stream);
9484 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9487 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9488 int incoming ATTRIBUTE_UNUSED)
9490 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9493 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9496 pa_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
9498 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9499 PA64 ABI says that objects larger than 128 bits are returned in memory.
9500 Note, int_size_in_bytes can return -1 if the size of the object is
9501 variable or larger than the maximum value that can be expressed as
9502 a HOST_WIDE_INT. It can also return zero for an empty type. The
9503 simplest way to handle variable and empty types is to pass them in
9504 memory. This avoids problems in defining the boundaries of argument
9505 slots, allocating registers, etc. */
9506 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9507 || int_size_in_bytes (type) <= 0);
9510 /* Structure to hold declaration and name of external symbols that are
9511 emitted by GCC. We generate a vector of these symbols and output them
9512 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9513 This avoids putting out names that are never really used. */
9515 typedef struct extern_symbol GTY(())
9521 /* Define gc'd vector type for extern_symbol. */
9522 DEF_VEC_O(extern_symbol);
9523 DEF_VEC_ALLOC_O(extern_symbol,gc);
9525 /* Vector of extern_symbol pointers. */
9526 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9528 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9529 /* Mark DECL (name NAME) as an external reference (assembler output
9530 file FILE). This saves the names to output at the end of the file
9531 if actually referenced. */
9534 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9536 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9538 gcc_assert (file == asm_out_file);
9543 /* Output text required at the end of an assembler file.
9544 This includes deferred plabels and .import directives for
9545 all external symbols that were actually referenced. */
9548 pa_hpux_file_end (void)
9553 if (!NO_DEFERRED_PROFILE_COUNTERS)
9554 output_deferred_profile_counters ();
9556 output_deferred_plabels ();
9558 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9560 tree decl = p->decl;
9562 if (!TREE_ASM_WRITTEN (decl)
9563 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9564 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9567 VEC_free (extern_symbol, gc, extern_symbols);