1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "integrate.h"
49 #include "target-def.h"
51 /* Return nonzero if there is a bypass for the output of
52 OUT_INSN and the fp store IN_INSN. */
54 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
56 enum machine_mode store_mode;
57 enum machine_mode other_mode;
60 if (recog_memoized (in_insn) < 0
61 || get_attr_type (in_insn) != TYPE_FPSTORE
62 || recog_memoized (out_insn) < 0)
65 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
67 set = single_set (out_insn);
71 other_mode = GET_MODE (SET_SRC (set));
73 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
77 #ifndef DO_FRAME_NOTES
78 #ifdef INCOMING_RETURN_ADDR_RTX
79 #define DO_FRAME_NOTES 1
81 #define DO_FRAME_NOTES 0
85 static void copy_reg_pointer (rtx, rtx);
86 static void fix_range (const char *);
87 static bool pa_handle_option (size_t, const char *, int);
88 static int hppa_address_cost (rtx);
89 static bool hppa_rtx_costs (rtx, int, int, int *);
90 static inline rtx force_mode (enum machine_mode, rtx);
91 static void pa_reorg (void);
92 static void pa_combine_instructions (void);
93 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
94 static int forward_branch_p (rtx);
95 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
96 static int compute_movmem_length (rtx);
97 static int compute_clrmem_length (rtx);
98 static bool pa_assemble_integer (rtx, unsigned int, int);
99 static void remove_useless_addtr_insns (int);
100 static void store_reg (int, HOST_WIDE_INT, int);
101 static void store_reg_modify (int, int, HOST_WIDE_INT);
102 static void load_reg (int, HOST_WIDE_INT, int);
103 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
104 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
105 static void update_total_code_bytes (int);
106 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
107 static int pa_adjust_cost (rtx, rtx, rtx, int);
108 static int pa_adjust_priority (rtx, int);
109 static int pa_issue_rate (void);
110 static void pa_select_section (tree, int, unsigned HOST_WIDE_INT)
112 static void pa_encode_section_info (tree, rtx, int);
113 static const char *pa_strip_name_encoding (const char *);
114 static bool pa_function_ok_for_sibcall (tree, tree);
115 static void pa_globalize_label (FILE *, const char *)
117 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
118 HOST_WIDE_INT, tree);
119 #if !defined(USE_COLLECT2)
120 static void pa_asm_out_constructor (rtx, int);
121 static void pa_asm_out_destructor (rtx, int);
123 static void pa_init_builtins (void);
124 static rtx hppa_builtin_saveregs (void);
125 static tree hppa_gimplify_va_arg_expr (tree, tree, tree *, tree *);
126 static bool pa_scalar_mode_supported_p (enum machine_mode);
127 static bool pa_commutative_p (rtx x, int outer_code);
128 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
129 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
130 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
131 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
132 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
133 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
134 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
135 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
136 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
137 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
138 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
139 static void output_deferred_plabels (void);
140 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
141 #ifdef ASM_OUTPUT_EXTERNAL_REAL
142 static void pa_hpux_file_end (void);
144 #ifdef HPUX_LONG_DOUBLE_LIBRARY
145 static void pa_hpux_init_libfuncs (void);
147 static rtx pa_struct_value_rtx (tree, int);
148 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
150 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
152 static struct machine_function * pa_init_machine_status (void);
153 static enum reg_class pa_secondary_reload (bool, rtx, enum reg_class,
155 secondary_reload_info *);
158 /* Save the operands last given to a compare for use when we
159 generate a scc or bcc insn. */
160 rtx hppa_compare_op0, hppa_compare_op1;
161 enum cmp_type hppa_branch_type;
163 /* Which cpu we are scheduling for. */
164 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
166 /* The UNIX standard to use for predefines and linking. */
167 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
169 /* Counts for the number of callee-saved general and floating point
170 registers which were saved by the current function's prologue. */
171 static int gr_saved, fr_saved;
173 static rtx find_addr_reg (rtx);
175 /* Keep track of the number of bytes we have output in the CODE subspace
176 during this compilation so we'll know when to emit inline long-calls. */
177 unsigned long total_code_bytes;
179 /* The last address of the previous function plus the number of bytes in
180 associated thunks that have been output. This is used to determine if
181 a thunk can use an IA-relative branch to reach its target function. */
182 static int last_address;
184 /* Variables to handle plabels that we discover are necessary at assembly
185 output time. They are output after the current function. */
186 struct deferred_plabel GTY(())
191 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
193 static size_t n_deferred_plabels = 0;
196 /* Initialize the GCC target structure. */
198 #undef TARGET_ASM_ALIGNED_HI_OP
199 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
200 #undef TARGET_ASM_ALIGNED_SI_OP
201 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
202 #undef TARGET_ASM_ALIGNED_DI_OP
203 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
204 #undef TARGET_ASM_UNALIGNED_HI_OP
205 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
206 #undef TARGET_ASM_UNALIGNED_SI_OP
207 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
208 #undef TARGET_ASM_UNALIGNED_DI_OP
209 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
210 #undef TARGET_ASM_INTEGER
211 #define TARGET_ASM_INTEGER pa_assemble_integer
213 #undef TARGET_ASM_FUNCTION_PROLOGUE
214 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
215 #undef TARGET_ASM_FUNCTION_EPILOGUE
216 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
218 #undef TARGET_SCHED_ADJUST_COST
219 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
220 #undef TARGET_SCHED_ADJUST_PRIORITY
221 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
222 #undef TARGET_SCHED_ISSUE_RATE
223 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
225 #undef TARGET_ENCODE_SECTION_INFO
226 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
227 #undef TARGET_STRIP_NAME_ENCODING
228 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
230 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
231 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
233 #undef TARGET_COMMUTATIVE_P
234 #define TARGET_COMMUTATIVE_P pa_commutative_p
236 #undef TARGET_ASM_OUTPUT_MI_THUNK
237 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
238 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
239 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
241 #undef TARGET_ASM_FILE_END
242 #ifdef ASM_OUTPUT_EXTERNAL_REAL
243 #define TARGET_ASM_FILE_END pa_hpux_file_end
245 #define TARGET_ASM_FILE_END output_deferred_plabels
248 #if !defined(USE_COLLECT2)
249 #undef TARGET_ASM_CONSTRUCTOR
250 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
251 #undef TARGET_ASM_DESTRUCTOR
252 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
255 #undef TARGET_DEFAULT_TARGET_FLAGS
256 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
257 #undef TARGET_HANDLE_OPTION
258 #define TARGET_HANDLE_OPTION pa_handle_option
260 #undef TARGET_INIT_BUILTINS
261 #define TARGET_INIT_BUILTINS pa_init_builtins
263 #undef TARGET_RTX_COSTS
264 #define TARGET_RTX_COSTS hppa_rtx_costs
265 #undef TARGET_ADDRESS_COST
266 #define TARGET_ADDRESS_COST hppa_address_cost
268 #undef TARGET_MACHINE_DEPENDENT_REORG
269 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
271 #ifdef HPUX_LONG_DOUBLE_LIBRARY
272 #undef TARGET_INIT_LIBFUNCS
273 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
276 #undef TARGET_PROMOTE_FUNCTION_RETURN
277 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
278 #undef TARGET_PROMOTE_PROTOTYPES
279 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
281 #undef TARGET_STRUCT_VALUE_RTX
282 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
283 #undef TARGET_RETURN_IN_MEMORY
284 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
285 #undef TARGET_MUST_PASS_IN_STACK
286 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
287 #undef TARGET_PASS_BY_REFERENCE
288 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
289 #undef TARGET_CALLEE_COPIES
290 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
291 #undef TARGET_ARG_PARTIAL_BYTES
292 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
294 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
295 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
296 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
297 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
299 #undef TARGET_SCALAR_MODE_SUPPORTED_P
300 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
302 #undef TARGET_CANNOT_FORCE_CONST_MEM
303 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
305 #undef TARGET_SECONDARY_RELOAD
306 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
308 struct gcc_target targetm = TARGET_INITIALIZER;
310 /* Parse the -mfixed-range= option string. */
313 fix_range (const char *const_str)
316 char *str, *dash, *comma;
318 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
319 REG2 are either register names or register numbers. The effect
320 of this option is to mark the registers in the range from REG1 to
321 REG2 as ``fixed'' so they won't be used by the compiler. This is
322 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
324 i = strlen (const_str);
325 str = (char *) alloca (i + 1);
326 memcpy (str, const_str, i + 1);
330 dash = strchr (str, '-');
333 warning (0, "value of -mfixed-range must have form REG1-REG2");
338 comma = strchr (dash + 1, ',');
342 first = decode_reg_name (str);
345 warning (0, "unknown register name: %s", str);
349 last = decode_reg_name (dash + 1);
352 warning (0, "unknown register name: %s", dash + 1);
360 warning (0, "%s-%s is an empty range", str, dash + 1);
364 for (i = first; i <= last; ++i)
365 fixed_regs[i] = call_used_regs[i] = 1;
374 /* Check if all floating point registers have been fixed. */
375 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
380 target_flags |= MASK_DISABLE_FPREGS;
383 /* Implement TARGET_HANDLE_OPTION. */
386 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
391 case OPT_mpa_risc_1_0:
393 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
397 case OPT_mpa_risc_1_1:
399 target_flags &= ~MASK_PA_20;
400 target_flags |= MASK_PA_11;
403 case OPT_mpa_risc_2_0:
405 target_flags |= MASK_PA_11 | MASK_PA_20;
409 if (strcmp (arg, "8000") == 0)
410 pa_cpu = PROCESSOR_8000;
411 else if (strcmp (arg, "7100") == 0)
412 pa_cpu = PROCESSOR_7100;
413 else if (strcmp (arg, "700") == 0)
414 pa_cpu = PROCESSOR_700;
415 else if (strcmp (arg, "7100LC") == 0)
416 pa_cpu = PROCESSOR_7100LC;
417 else if (strcmp (arg, "7200") == 0)
418 pa_cpu = PROCESSOR_7200;
419 else if (strcmp (arg, "7300") == 0)
420 pa_cpu = PROCESSOR_7300;
425 case OPT_mfixed_range_:
435 #if TARGET_HPUX_10_10
441 #if TARGET_HPUX_11_11
453 override_options (void)
455 /* Unconditional branches in the delay slot are not compatible with dwarf2
456 call frame information. There is no benefit in using this optimization
457 on PA8000 and later processors. */
458 if (pa_cpu >= PROCESSOR_8000
459 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
460 || flag_unwind_tables)
461 target_flags &= ~MASK_JUMP_IN_DELAY;
463 if (flag_pic && TARGET_PORTABLE_RUNTIME)
465 warning (0, "PIC code generation is not supported in the portable runtime model");
468 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
470 warning (0, "PIC code generation is not compatible with fast indirect calls");
473 if (! TARGET_GAS && write_symbols != NO_DEBUG)
475 warning (0, "-g is only supported when using GAS on this processor,");
476 warning (0, "-g option disabled");
477 write_symbols = NO_DEBUG;
480 /* We only support the "big PIC" model now. And we always generate PIC
481 code when in 64bit mode. */
482 if (flag_pic == 1 || TARGET_64BIT)
485 /* We can't guarantee that .dword is available for 32-bit targets. */
486 if (UNITS_PER_WORD == 4)
487 targetm.asm_out.aligned_op.di = NULL;
489 /* The unaligned ops are only available when using GAS. */
492 targetm.asm_out.unaligned_op.hi = NULL;
493 targetm.asm_out.unaligned_op.si = NULL;
494 targetm.asm_out.unaligned_op.di = NULL;
497 init_machine_status = pa_init_machine_status;
501 pa_init_builtins (void)
503 #ifdef DONT_HAVE_FPUTC_UNLOCKED
504 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
505 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
506 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
507 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
511 /* Function to init struct machine_function.
512 This will be called, via a pointer variable,
513 from push_function_context. */
515 static struct machine_function *
516 pa_init_machine_status (void)
518 return ggc_alloc_cleared (sizeof (machine_function));
521 /* If FROM is a probable pointer register, mark TO as a probable
522 pointer register with the same pointer alignment as FROM. */
525 copy_reg_pointer (rtx to, rtx from)
527 if (REG_POINTER (from))
528 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
531 /* Return 1 if X contains a symbolic expression. We know these
532 expressions will have one of a few well defined forms, so
533 we need only check those forms. */
535 symbolic_expression_p (rtx x)
538 /* Strip off any HIGH. */
539 if (GET_CODE (x) == HIGH)
542 return (symbolic_operand (x, VOIDmode));
545 /* Accept any constant that can be moved in one instruction into a
548 cint_ok_for_move (HOST_WIDE_INT intval)
550 /* OK if ldo, ldil, or zdepi, can be used. */
551 return (CONST_OK_FOR_LETTER_P (intval, 'J')
552 || CONST_OK_FOR_LETTER_P (intval, 'N')
553 || CONST_OK_FOR_LETTER_P (intval, 'K'));
556 /* Return truth value of whether OP can be used as an operand in a
559 adddi3_operand (rtx op, enum machine_mode mode)
561 return (register_operand (op, mode)
562 || (GET_CODE (op) == CONST_INT
563 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
566 /* True iff zdepi can be used to generate this CONST_INT.
567 zdepi first sign extends a 5 bit signed number to a given field
568 length, then places this field anywhere in a zero. */
570 zdepi_cint_p (unsigned HOST_WIDE_INT x)
572 unsigned HOST_WIDE_INT lsb_mask, t;
574 /* This might not be obvious, but it's at least fast.
575 This function is critical; we don't have the time loops would take. */
577 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
578 /* Return true iff t is a power of two. */
579 return ((t & (t - 1)) == 0);
582 /* True iff depi or extru can be used to compute (reg & mask).
583 Accept bit pattern like these:
588 and_mask_p (unsigned HOST_WIDE_INT mask)
591 mask += mask & -mask;
592 return (mask & (mask - 1)) == 0;
595 /* True iff depi can be used to compute (reg | MASK). */
597 ior_mask_p (unsigned HOST_WIDE_INT mask)
599 mask += mask & -mask;
600 return (mask & (mask - 1)) == 0;
603 /* Legitimize PIC addresses. If the address is already
604 position-independent, we return ORIG. Newly generated
605 position-independent addresses go to REG. If we need more
606 than one register, we lose. */
609 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
613 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
615 /* Labels need special handling. */
616 if (pic_label_operand (orig, mode))
618 /* We do not want to go through the movXX expanders here since that
619 would create recursion.
621 Nor do we really want to call a generator for a named pattern
622 since that requires multiple patterns if we want to support
625 So instead we just emit the raw set, which avoids the movXX
626 expanders completely. */
627 mark_reg_pointer (reg, BITS_PER_UNIT);
628 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
629 current_function_uses_pic_offset_table = 1;
632 if (GET_CODE (orig) == SYMBOL_REF)
638 /* Before reload, allocate a temporary register for the intermediate
639 result. This allows the sequence to be deleted when the final
640 result is unused and the insns are trivially dead. */
641 tmp_reg = ((reload_in_progress || reload_completed)
642 ? reg : gen_reg_rtx (Pmode));
644 emit_move_insn (tmp_reg,
645 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
646 gen_rtx_HIGH (word_mode, orig)));
648 = gen_const_mem (Pmode,
649 gen_rtx_LO_SUM (Pmode, tmp_reg,
650 gen_rtx_UNSPEC (Pmode,
654 current_function_uses_pic_offset_table = 1;
655 mark_reg_pointer (reg, BITS_PER_UNIT);
656 insn = emit_move_insn (reg, pic_ref);
658 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
659 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig, REG_NOTES (insn));
663 else if (GET_CODE (orig) == CONST)
667 if (GET_CODE (XEXP (orig, 0)) == PLUS
668 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
672 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
674 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
675 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
676 base == reg ? 0 : reg);
678 if (GET_CODE (orig) == CONST_INT)
680 if (INT_14_BITS (orig))
681 return plus_constant (base, INTVAL (orig));
682 orig = force_reg (Pmode, orig);
684 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
685 /* Likewise, should we set special REG_NOTEs here? */
691 static GTY(()) rtx gen_tls_tga;
694 gen_tls_get_addr (void)
697 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
702 hppa_tls_call (rtx arg)
706 ret = gen_reg_rtx (Pmode);
707 emit_library_call_value (gen_tls_get_addr (), ret,
708 LCT_CONST, Pmode, 1, arg, Pmode);
714 legitimize_tls_address (rtx addr)
716 rtx ret, insn, tmp, t1, t2, tp;
717 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
721 case TLS_MODEL_GLOBAL_DYNAMIC:
722 tmp = gen_reg_rtx (Pmode);
723 emit_insn (gen_tgd_load (tmp, addr));
724 ret = hppa_tls_call (tmp);
727 case TLS_MODEL_LOCAL_DYNAMIC:
728 ret = gen_reg_rtx (Pmode);
729 tmp = gen_reg_rtx (Pmode);
731 emit_insn (gen_tld_load (tmp, addr));
732 t1 = hppa_tls_call (tmp);
735 t2 = gen_reg_rtx (Pmode);
736 emit_libcall_block (insn, t2, t1,
737 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
739 emit_insn (gen_tld_offset_load (ret, addr, t2));
742 case TLS_MODEL_INITIAL_EXEC:
743 tp = gen_reg_rtx (Pmode);
744 tmp = gen_reg_rtx (Pmode);
745 ret = gen_reg_rtx (Pmode);
746 emit_insn (gen_tp_load (tp));
747 emit_insn (gen_tie_load (tmp, addr));
748 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
751 case TLS_MODEL_LOCAL_EXEC:
752 tp = gen_reg_rtx (Pmode);
753 ret = gen_reg_rtx (Pmode);
754 emit_insn (gen_tp_load (tp));
755 emit_insn (gen_tle_load (ret, addr, tp));
765 /* Try machine-dependent ways of modifying an illegitimate address
766 to be legitimate. If we find one, return the new, valid address.
767 This macro is used in only one place: `memory_address' in explow.c.
769 OLDX is the address as it was before break_out_memory_refs was called.
770 In some cases it is useful to look at this to decide what needs to be done.
772 MODE and WIN are passed so that this macro can use
773 GO_IF_LEGITIMATE_ADDRESS.
775 It is always safe for this macro to do nothing. It exists to recognize
776 opportunities to optimize the output.
778 For the PA, transform:
780 memory(X + <large int>)
784 if (<large int> & mask) >= 16
785 Y = (<large int> & ~mask) + mask + 1 Round up.
787 Y = (<large int> & ~mask) Round down.
789 memory (Z + (<large int> - Y));
791 This is for CSE to find several similar references, and only use one Z.
793 X can either be a SYMBOL_REF or REG, but because combine cannot
794 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
795 D will not fit in 14 bits.
797 MODE_FLOAT references allow displacements which fit in 5 bits, so use
800 MODE_INT references allow displacements which fit in 14 bits, so use
803 This relies on the fact that most mode MODE_FLOAT references will use FP
804 registers and most mode MODE_INT references will use integer registers.
805 (In the rare case of an FP register used in an integer MODE, we depend
806 on secondary reloads to clean things up.)
809 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
810 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
811 addressing modes to be used).
813 Put X and Z into registers. Then put the entire expression into
817 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
818 enum machine_mode mode)
822 /* We need to canonicalize the order of operands in unscaled indexed
823 addresses since the code that checks if an address is valid doesn't
824 always try both orders. */
825 if (!TARGET_NO_SPACE_REGS
826 && GET_CODE (x) == PLUS
827 && GET_MODE (x) == Pmode
828 && REG_P (XEXP (x, 0))
829 && REG_P (XEXP (x, 1))
830 && REG_POINTER (XEXP (x, 0))
831 && !REG_POINTER (XEXP (x, 1)))
832 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
834 if (PA_SYMBOL_REF_TLS_P (x))
835 return legitimize_tls_address (x);
837 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
839 /* Strip off CONST. */
840 if (GET_CODE (x) == CONST)
843 /* Special case. Get the SYMBOL_REF into a register and use indexing.
844 That should always be safe. */
845 if (GET_CODE (x) == PLUS
846 && GET_CODE (XEXP (x, 0)) == REG
847 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
849 rtx reg = force_reg (Pmode, XEXP (x, 1));
850 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
853 /* Note we must reject symbols which represent function addresses
854 since the assembler/linker can't handle arithmetic on plabels. */
855 if (GET_CODE (x) == PLUS
856 && GET_CODE (XEXP (x, 1)) == CONST_INT
857 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
858 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
859 || GET_CODE (XEXP (x, 0)) == REG))
861 rtx int_part, ptr_reg;
863 int offset = INTVAL (XEXP (x, 1));
866 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
867 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
869 /* Choose which way to round the offset. Round up if we
870 are >= halfway to the next boundary. */
871 if ((offset & mask) >= ((mask + 1) / 2))
872 newoffset = (offset & ~ mask) + mask + 1;
874 newoffset = (offset & ~ mask);
876 /* If the newoffset will not fit in 14 bits (ldo), then
877 handling this would take 4 or 5 instructions (2 to load
878 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
879 add the new offset and the SYMBOL_REF.) Combine can
880 not handle 4->2 or 5->2 combinations, so do not create
882 if (! VAL_14_BITS_P (newoffset)
883 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
885 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
888 gen_rtx_HIGH (Pmode, const_part));
891 gen_rtx_LO_SUM (Pmode,
892 tmp_reg, const_part));
896 if (! VAL_14_BITS_P (newoffset))
897 int_part = force_reg (Pmode, GEN_INT (newoffset));
899 int_part = GEN_INT (newoffset);
901 ptr_reg = force_reg (Pmode,
903 force_reg (Pmode, XEXP (x, 0)),
906 return plus_constant (ptr_reg, offset - newoffset);
909 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
911 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
912 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
913 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
914 && (OBJECT_P (XEXP (x, 1))
915 || GET_CODE (XEXP (x, 1)) == SUBREG)
916 && GET_CODE (XEXP (x, 1)) != CONST)
918 int val = INTVAL (XEXP (XEXP (x, 0), 1));
922 if (GET_CODE (reg1) != REG)
923 reg1 = force_reg (Pmode, force_operand (reg1, 0));
925 reg2 = XEXP (XEXP (x, 0), 0);
926 if (GET_CODE (reg2) != REG)
927 reg2 = force_reg (Pmode, force_operand (reg2, 0));
929 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
936 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
938 Only do so for floating point modes since this is more speculative
939 and we lose if it's an integer store. */
940 if (GET_CODE (x) == PLUS
941 && GET_CODE (XEXP (x, 0)) == PLUS
942 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
943 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
944 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
945 && (mode == SFmode || mode == DFmode))
948 /* First, try and figure out what to use as a base register. */
949 rtx reg1, reg2, base, idx, orig_base;
951 reg1 = XEXP (XEXP (x, 0), 1);
956 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
957 then emit_move_sequence will turn on REG_POINTER so we'll know
958 it's a base register below. */
959 if (GET_CODE (reg1) != REG)
960 reg1 = force_reg (Pmode, force_operand (reg1, 0));
962 if (GET_CODE (reg2) != REG)
963 reg2 = force_reg (Pmode, force_operand (reg2, 0));
965 /* Figure out what the base and index are. */
967 if (GET_CODE (reg1) == REG
968 && REG_POINTER (reg1))
971 orig_base = XEXP (XEXP (x, 0), 1);
972 idx = gen_rtx_PLUS (Pmode,
974 XEXP (XEXP (XEXP (x, 0), 0), 0),
975 XEXP (XEXP (XEXP (x, 0), 0), 1)),
978 else if (GET_CODE (reg2) == REG
979 && REG_POINTER (reg2))
982 orig_base = XEXP (x, 1);
989 /* If the index adds a large constant, try to scale the
990 constant so that it can be loaded with only one insn. */
991 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
992 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
993 / INTVAL (XEXP (XEXP (idx, 0), 1)))
994 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
996 /* Divide the CONST_INT by the scale factor, then add it to A. */
997 int val = INTVAL (XEXP (idx, 1));
999 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1000 reg1 = XEXP (XEXP (idx, 0), 0);
1001 if (GET_CODE (reg1) != REG)
1002 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1004 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1006 /* We can now generate a simple scaled indexed address. */
1009 (Pmode, gen_rtx_PLUS (Pmode,
1010 gen_rtx_MULT (Pmode, reg1,
1011 XEXP (XEXP (idx, 0), 1)),
1015 /* If B + C is still a valid base register, then add them. */
1016 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1017 && INTVAL (XEXP (idx, 1)) <= 4096
1018 && INTVAL (XEXP (idx, 1)) >= -4096)
1020 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1023 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1025 reg2 = XEXP (XEXP (idx, 0), 0);
1026 if (GET_CODE (reg2) != CONST_INT)
1027 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1029 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1030 gen_rtx_MULT (Pmode,
1036 /* Get the index into a register, then add the base + index and
1037 return a register holding the result. */
1039 /* First get A into a register. */
1040 reg1 = XEXP (XEXP (idx, 0), 0);
1041 if (GET_CODE (reg1) != REG)
1042 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1044 /* And get B into a register. */
1045 reg2 = XEXP (idx, 1);
1046 if (GET_CODE (reg2) != REG)
1047 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1049 reg1 = force_reg (Pmode,
1050 gen_rtx_PLUS (Pmode,
1051 gen_rtx_MULT (Pmode, reg1,
1052 XEXP (XEXP (idx, 0), 1)),
1055 /* Add the result to our base register and return. */
1056 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1060 /* Uh-oh. We might have an address for x[n-100000]. This needs
1061 special handling to avoid creating an indexed memory address
1062 with x-100000 as the base.
1064 If the constant part is small enough, then it's still safe because
1065 there is a guard page at the beginning and end of the data segment.
1067 Scaled references are common enough that we want to try and rearrange the
1068 terms so that we can use indexing for these addresses too. Only
1069 do the optimization for floatint point modes. */
1071 if (GET_CODE (x) == PLUS
1072 && symbolic_expression_p (XEXP (x, 1)))
1074 /* Ugly. We modify things here so that the address offset specified
1075 by the index expression is computed first, then added to x to form
1076 the entire address. */
1078 rtx regx1, regx2, regy1, regy2, y;
1080 /* Strip off any CONST. */
1082 if (GET_CODE (y) == CONST)
1085 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1087 /* See if this looks like
1088 (plus (mult (reg) (shadd_const))
1089 (const (plus (symbol_ref) (const_int))))
1091 Where const_int is small. In that case the const
1092 expression is a valid pointer for indexing.
1094 If const_int is big, but can be divided evenly by shadd_const
1095 and added to (reg). This allows more scaled indexed addresses. */
1096 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1097 && GET_CODE (XEXP (x, 0)) == MULT
1098 && GET_CODE (XEXP (y, 1)) == CONST_INT
1099 && INTVAL (XEXP (y, 1)) >= -4096
1100 && INTVAL (XEXP (y, 1)) <= 4095
1101 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1102 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1104 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1108 if (GET_CODE (reg1) != REG)
1109 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1111 reg2 = XEXP (XEXP (x, 0), 0);
1112 if (GET_CODE (reg2) != REG)
1113 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1115 return force_reg (Pmode,
1116 gen_rtx_PLUS (Pmode,
1117 gen_rtx_MULT (Pmode,
1122 else if ((mode == DFmode || mode == SFmode)
1123 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1124 && GET_CODE (XEXP (x, 0)) == MULT
1125 && GET_CODE (XEXP (y, 1)) == CONST_INT
1126 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1127 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1128 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1131 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1132 / INTVAL (XEXP (XEXP (x, 0), 1))));
1133 regx2 = XEXP (XEXP (x, 0), 0);
1134 if (GET_CODE (regx2) != REG)
1135 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1136 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1140 gen_rtx_PLUS (Pmode,
1141 gen_rtx_MULT (Pmode, regx2,
1142 XEXP (XEXP (x, 0), 1)),
1143 force_reg (Pmode, XEXP (y, 0))));
1145 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1146 && INTVAL (XEXP (y, 1)) >= -4096
1147 && INTVAL (XEXP (y, 1)) <= 4095)
1149 /* This is safe because of the guard page at the
1150 beginning and end of the data space. Just
1151 return the original address. */
1156 /* Doesn't look like one we can optimize. */
1157 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1158 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1159 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1160 regx1 = force_reg (Pmode,
1161 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1163 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1171 /* For the HPPA, REG and REG+CONST is cost 0
1172 and addresses involving symbolic constants are cost 2.
1174 PIC addresses are very expensive.
1176 It is no coincidence that this has the same structure
1177 as GO_IF_LEGITIMATE_ADDRESS. */
1180 hppa_address_cost (rtx X)
1182 switch (GET_CODE (X))
1195 /* Compute a (partial) cost for rtx X. Return true if the complete
1196 cost has been computed, and false if subexpressions should be
1197 scanned. In either case, *TOTAL contains the cost result. */
1200 hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
1205 if (INTVAL (x) == 0)
1207 else if (INT_14_BITS (x))
1224 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1225 && outer_code != SET)
1232 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1233 *total = COSTS_N_INSNS (3);
1234 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1235 *total = COSTS_N_INSNS (8);
1237 *total = COSTS_N_INSNS (20);
1241 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1243 *total = COSTS_N_INSNS (14);
1251 *total = COSTS_N_INSNS (60);
1254 case PLUS: /* this includes shNadd insns */
1256 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1257 *total = COSTS_N_INSNS (3);
1259 *total = COSTS_N_INSNS (1);
1265 *total = COSTS_N_INSNS (1);
1273 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1274 new rtx with the correct mode. */
1276 force_mode (enum machine_mode mode, rtx orig)
1278 if (mode == GET_MODE (orig))
1281 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1283 return gen_rtx_REG (mode, REGNO (orig));
1286 /* Return 1 if *X is a thread-local symbol. */
1289 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1291 return PA_SYMBOL_REF_TLS_P (*x);
1294 /* Return 1 if X contains a thread-local symbol. */
1297 pa_tls_referenced_p (rtx x)
1299 if (!TARGET_HAVE_TLS)
1302 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1305 /* Emit insns to move operands[1] into operands[0].
1307 Return 1 if we have written out everything that needs to be done to
1308 do the move. Otherwise, return 0 and the caller will emit the move
1311 Note SCRATCH_REG may not be in the proper mode depending on how it
1312 will be used. This routine is responsible for creating a new copy
1313 of SCRATCH_REG in the proper mode. */
1316 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1318 register rtx operand0 = operands[0];
1319 register rtx operand1 = operands[1];
1322 /* We can only handle indexed addresses in the destination operand
1323 of floating point stores. Thus, we need to break out indexed
1324 addresses from the destination operand. */
1325 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1327 /* This is only safe up to the beginning of life analysis. */
1328 gcc_assert (!no_new_pseudos);
1330 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1331 operand0 = replace_equiv_address (operand0, tem);
1334 /* On targets with non-equivalent space registers, break out unscaled
1335 indexed addresses from the source operand before the final CSE.
1336 We have to do this because the REG_POINTER flag is not correctly
1337 carried through various optimization passes and CSE may substitute
1338 a pseudo without the pointer set for one with the pointer set. As
1339 a result, we loose various opportunities to create insns with
1340 unscaled indexed addresses. */
1341 if (!TARGET_NO_SPACE_REGS
1342 && !cse_not_expected
1343 && GET_CODE (operand1) == MEM
1344 && GET_CODE (XEXP (operand1, 0)) == PLUS
1345 && REG_P (XEXP (XEXP (operand1, 0), 0))
1346 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1348 = replace_equiv_address (operand1,
1349 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1352 && reload_in_progress && GET_CODE (operand0) == REG
1353 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1354 operand0 = reg_equiv_mem[REGNO (operand0)];
1355 else if (scratch_reg
1356 && reload_in_progress && GET_CODE (operand0) == SUBREG
1357 && GET_CODE (SUBREG_REG (operand0)) == REG
1358 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1360 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1361 the code which tracks sets/uses for delete_output_reload. */
1362 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1363 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1364 SUBREG_BYTE (operand0));
1365 operand0 = alter_subreg (&temp);
1369 && reload_in_progress && GET_CODE (operand1) == REG
1370 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1371 operand1 = reg_equiv_mem[REGNO (operand1)];
1372 else if (scratch_reg
1373 && reload_in_progress && GET_CODE (operand1) == SUBREG
1374 && GET_CODE (SUBREG_REG (operand1)) == REG
1375 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1377 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1378 the code which tracks sets/uses for delete_output_reload. */
1379 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1380 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1381 SUBREG_BYTE (operand1));
1382 operand1 = alter_subreg (&temp);
1385 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1386 && ((tem = find_replacement (&XEXP (operand0, 0)))
1387 != XEXP (operand0, 0)))
1388 operand0 = replace_equiv_address (operand0, tem);
1390 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1391 && ((tem = find_replacement (&XEXP (operand1, 0)))
1392 != XEXP (operand1, 0)))
1393 operand1 = replace_equiv_address (operand1, tem);
1395 /* Handle secondary reloads for loads/stores of FP registers from
1396 REG+D addresses where D does not fit in 5 or 14 bits, including
1397 (subreg (mem (addr))) cases. */
1399 && fp_reg_operand (operand0, mode)
1400 && ((GET_CODE (operand1) == MEM
1401 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1402 XEXP (operand1, 0)))
1403 || ((GET_CODE (operand1) == SUBREG
1404 && GET_CODE (XEXP (operand1, 0)) == MEM
1405 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1407 XEXP (XEXP (operand1, 0), 0))))))
1409 if (GET_CODE (operand1) == SUBREG)
1410 operand1 = XEXP (operand1, 0);
1412 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1413 it in WORD_MODE regardless of what mode it was originally given
1415 scratch_reg = force_mode (word_mode, scratch_reg);
1417 /* D might not fit in 14 bits either; for such cases load D into
1419 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1421 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1422 emit_move_insn (scratch_reg,
1423 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1425 XEXP (XEXP (operand1, 0), 0),
1429 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1430 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1431 replace_equiv_address (operand1, scratch_reg)));
1434 else if (scratch_reg
1435 && fp_reg_operand (operand1, mode)
1436 && ((GET_CODE (operand0) == MEM
1437 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1439 XEXP (operand0, 0)))
1440 || ((GET_CODE (operand0) == SUBREG)
1441 && GET_CODE (XEXP (operand0, 0)) == MEM
1442 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1444 XEXP (XEXP (operand0, 0), 0)))))
1446 if (GET_CODE (operand0) == SUBREG)
1447 operand0 = XEXP (operand0, 0);
1449 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1450 it in WORD_MODE regardless of what mode it was originally given
1452 scratch_reg = force_mode (word_mode, scratch_reg);
1454 /* D might not fit in 14 bits either; for such cases load D into
1456 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1458 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1459 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1462 XEXP (XEXP (operand0, 0),
1467 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1468 emit_insn (gen_rtx_SET (VOIDmode,
1469 replace_equiv_address (operand0, scratch_reg),
1473 /* Handle secondary reloads for loads of FP registers from constant
1474 expressions by forcing the constant into memory.
1476 Use scratch_reg to hold the address of the memory location.
1478 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1479 NO_REGS when presented with a const_int and a register class
1480 containing only FP registers. Doing so unfortunately creates
1481 more problems than it solves. Fix this for 2.5. */
1482 else if (scratch_reg
1483 && CONSTANT_P (operand1)
1484 && fp_reg_operand (operand0, mode))
1486 rtx const_mem, xoperands[2];
1488 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1489 it in WORD_MODE regardless of what mode it was originally given
1491 scratch_reg = force_mode (word_mode, scratch_reg);
1493 /* Force the constant into memory and put the address of the
1494 memory location into scratch_reg. */
1495 const_mem = force_const_mem (mode, operand1);
1496 xoperands[0] = scratch_reg;
1497 xoperands[1] = XEXP (const_mem, 0);
1498 emit_move_sequence (xoperands, Pmode, 0);
1500 /* Now load the destination register. */
1501 emit_insn (gen_rtx_SET (mode, operand0,
1502 replace_equiv_address (const_mem, scratch_reg)));
1505 /* Handle secondary reloads for SAR. These occur when trying to load
1506 the SAR from memory, FP register, or with a constant. */
1507 else if (scratch_reg
1508 && GET_CODE (operand0) == REG
1509 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1510 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1511 && (GET_CODE (operand1) == MEM
1512 || GET_CODE (operand1) == CONST_INT
1513 || (GET_CODE (operand1) == REG
1514 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1516 /* D might not fit in 14 bits either; for such cases load D into
1518 if (GET_CODE (operand1) == MEM
1519 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1521 /* We are reloading the address into the scratch register, so we
1522 want to make sure the scratch register is a full register. */
1523 scratch_reg = force_mode (word_mode, scratch_reg);
1525 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1526 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1529 XEXP (XEXP (operand1, 0),
1533 /* Now we are going to load the scratch register from memory,
1534 we want to load it in the same width as the original MEM,
1535 which must be the same as the width of the ultimate destination,
1537 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1539 emit_move_insn (scratch_reg,
1540 replace_equiv_address (operand1, scratch_reg));
1544 /* We want to load the scratch register using the same mode as
1545 the ultimate destination. */
1546 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1548 emit_move_insn (scratch_reg, operand1);
1551 /* And emit the insn to set the ultimate destination. We know that
1552 the scratch register has the same mode as the destination at this
1554 emit_move_insn (operand0, scratch_reg);
1557 /* Handle the most common case: storing into a register. */
1558 else if (register_operand (operand0, mode))
1560 if (register_operand (operand1, mode)
1561 || (GET_CODE (operand1) == CONST_INT
1562 && cint_ok_for_move (INTVAL (operand1)))
1563 || (operand1 == CONST0_RTX (mode))
1564 || (GET_CODE (operand1) == HIGH
1565 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1566 /* Only `general_operands' can come here, so MEM is ok. */
1567 || GET_CODE (operand1) == MEM)
1569 /* Various sets are created during RTL generation which don't
1570 have the REG_POINTER flag correctly set. After the CSE pass,
1571 instruction recognition can fail if we don't consistently
1572 set this flag when performing register copies. This should
1573 also improve the opportunities for creating insns that use
1574 unscaled indexing. */
1575 if (REG_P (operand0) && REG_P (operand1))
1577 if (REG_POINTER (operand1)
1578 && !REG_POINTER (operand0)
1579 && !HARD_REGISTER_P (operand0))
1580 copy_reg_pointer (operand0, operand1);
1581 else if (REG_POINTER (operand0)
1582 && !REG_POINTER (operand1)
1583 && !HARD_REGISTER_P (operand1))
1584 copy_reg_pointer (operand1, operand0);
1587 /* When MEMs are broken out, the REG_POINTER flag doesn't
1588 get set. In some cases, we can set the REG_POINTER flag
1589 from the declaration for the MEM. */
1590 if (REG_P (operand0)
1591 && GET_CODE (operand1) == MEM
1592 && !REG_POINTER (operand0))
1594 tree decl = MEM_EXPR (operand1);
1596 /* Set the register pointer flag and register alignment
1597 if the declaration for this memory reference is a
1598 pointer type. Fortran indirect argument references
1601 && !(flag_argument_noalias > 1
1602 && TREE_CODE (decl) == INDIRECT_REF
1603 && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1607 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1609 if (TREE_CODE (decl) == COMPONENT_REF)
1610 decl = TREE_OPERAND (decl, 1);
1612 type = TREE_TYPE (decl);
1613 if (TREE_CODE (type) == ARRAY_TYPE)
1614 type = get_inner_array_type (type);
1616 if (POINTER_TYPE_P (type))
1620 type = TREE_TYPE (type);
1621 /* Using TYPE_ALIGN_OK is rather conservative as
1622 only the ada frontend actually sets it. */
1623 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1625 mark_reg_pointer (operand0, align);
1630 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1634 else if (GET_CODE (operand0) == MEM)
1636 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1637 && !(reload_in_progress || reload_completed))
1639 rtx temp = gen_reg_rtx (DFmode);
1641 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1642 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1645 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1647 /* Run this case quickly. */
1648 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1651 if (! (reload_in_progress || reload_completed))
1653 operands[0] = validize_mem (operand0);
1654 operands[1] = operand1 = force_reg (mode, operand1);
1658 /* Simplify the source if we need to.
1659 Note we do have to handle function labels here, even though we do
1660 not consider them legitimate constants. Loop optimizations can
1661 call the emit_move_xxx with one as a source. */
1662 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1663 || function_label_operand (operand1, mode)
1664 || (GET_CODE (operand1) == HIGH
1665 && symbolic_operand (XEXP (operand1, 0), mode)))
1669 if (GET_CODE (operand1) == HIGH)
1672 operand1 = XEXP (operand1, 0);
1674 if (symbolic_operand (operand1, mode))
1676 /* Argh. The assembler and linker can't handle arithmetic
1679 So we force the plabel into memory, load operand0 from
1680 the memory location, then add in the constant part. */
1681 if ((GET_CODE (operand1) == CONST
1682 && GET_CODE (XEXP (operand1, 0)) == PLUS
1683 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1684 || function_label_operand (operand1, mode))
1686 rtx temp, const_part;
1688 /* Figure out what (if any) scratch register to use. */
1689 if (reload_in_progress || reload_completed)
1691 scratch_reg = scratch_reg ? scratch_reg : operand0;
1692 /* SCRATCH_REG will hold an address and maybe the actual
1693 data. We want it in WORD_MODE regardless of what mode it
1694 was originally given to us. */
1695 scratch_reg = force_mode (word_mode, scratch_reg);
1698 scratch_reg = gen_reg_rtx (Pmode);
1700 if (GET_CODE (operand1) == CONST)
1702 /* Save away the constant part of the expression. */
1703 const_part = XEXP (XEXP (operand1, 0), 1);
1704 gcc_assert (GET_CODE (const_part) == CONST_INT);
1706 /* Force the function label into memory. */
1707 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1711 /* No constant part. */
1712 const_part = NULL_RTX;
1714 /* Force the function label into memory. */
1715 temp = force_const_mem (mode, operand1);
1719 /* Get the address of the memory location. PIC-ify it if
1721 temp = XEXP (temp, 0);
1723 temp = legitimize_pic_address (temp, mode, scratch_reg);
1725 /* Put the address of the memory location into our destination
1728 emit_move_sequence (operands, mode, scratch_reg);
1730 /* Now load from the memory location into our destination
1732 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1733 emit_move_sequence (operands, mode, scratch_reg);
1735 /* And add back in the constant part. */
1736 if (const_part != NULL_RTX)
1737 expand_inc (operand0, const_part);
1746 if (reload_in_progress || reload_completed)
1748 temp = scratch_reg ? scratch_reg : operand0;
1749 /* TEMP will hold an address and maybe the actual
1750 data. We want it in WORD_MODE regardless of what mode it
1751 was originally given to us. */
1752 temp = force_mode (word_mode, temp);
1755 temp = gen_reg_rtx (Pmode);
1757 /* (const (plus (symbol) (const_int))) must be forced to
1758 memory during/after reload if the const_int will not fit
1760 if (GET_CODE (operand1) == CONST
1761 && GET_CODE (XEXP (operand1, 0)) == PLUS
1762 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1763 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1764 && (reload_completed || reload_in_progress)
1767 rtx const_mem = force_const_mem (mode, operand1);
1768 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1770 operands[1] = replace_equiv_address (const_mem, operands[1]);
1771 emit_move_sequence (operands, mode, temp);
1775 operands[1] = legitimize_pic_address (operand1, mode, temp);
1776 if (REG_P (operand0) && REG_P (operands[1]))
1777 copy_reg_pointer (operand0, operands[1]);
1778 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1781 /* On the HPPA, references to data space are supposed to use dp,
1782 register 27, but showing it in the RTL inhibits various cse
1783 and loop optimizations. */
1788 if (reload_in_progress || reload_completed)
1790 temp = scratch_reg ? scratch_reg : operand0;
1791 /* TEMP will hold an address and maybe the actual
1792 data. We want it in WORD_MODE regardless of what mode it
1793 was originally given to us. */
1794 temp = force_mode (word_mode, temp);
1797 temp = gen_reg_rtx (mode);
1799 /* Loading a SYMBOL_REF into a register makes that register
1800 safe to be used as the base in an indexed address.
1802 Don't mark hard registers though. That loses. */
1803 if (GET_CODE (operand0) == REG
1804 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1805 mark_reg_pointer (operand0, BITS_PER_UNIT);
1806 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1807 mark_reg_pointer (temp, BITS_PER_UNIT);
1810 set = gen_rtx_SET (mode, operand0, temp);
1812 set = gen_rtx_SET (VOIDmode,
1814 gen_rtx_LO_SUM (mode, temp, operand1));
1816 emit_insn (gen_rtx_SET (VOIDmode,
1818 gen_rtx_HIGH (mode, operand1)));
1824 else if (pa_tls_referenced_p (operand1))
1829 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1831 addend = XEXP (XEXP (tmp, 0), 1);
1832 tmp = XEXP (XEXP (tmp, 0), 0);
1835 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1836 tmp = legitimize_tls_address (tmp);
1839 tmp = gen_rtx_PLUS (mode, tmp, addend);
1840 tmp = force_operand (tmp, operands[0]);
1844 else if (GET_CODE (operand1) != CONST_INT
1845 || !cint_ok_for_move (INTVAL (operand1)))
1849 HOST_WIDE_INT value = 0;
1850 HOST_WIDE_INT insv = 0;
1853 if (GET_CODE (operand1) == CONST_INT)
1854 value = INTVAL (operand1);
1857 && GET_CODE (operand1) == CONST_INT
1858 && HOST_BITS_PER_WIDE_INT > 32
1859 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1863 /* Extract the low order 32 bits of the value and sign extend.
1864 If the new value is the same as the original value, we can
1865 can use the original value as-is. If the new value is
1866 different, we use it and insert the most-significant 32-bits
1867 of the original value into the final result. */
1868 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1869 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1872 #if HOST_BITS_PER_WIDE_INT > 32
1873 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
1877 operand1 = GEN_INT (nval);
1881 if (reload_in_progress || reload_completed)
1882 temp = scratch_reg ? scratch_reg : operand0;
1884 temp = gen_reg_rtx (mode);
1886 /* We don't directly split DImode constants on 32-bit targets
1887 because PLUS uses an 11-bit immediate and the insn sequence
1888 generated is not as efficient as the one using HIGH/LO_SUM. */
1889 if (GET_CODE (operand1) == CONST_INT
1890 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
1893 /* Directly break constant into high and low parts. This
1894 provides better optimization opportunities because various
1895 passes recognize constants split with PLUS but not LO_SUM.
1896 We use a 14-bit signed low part except when the addition
1897 of 0x4000 to the high part might change the sign of the
1899 HOST_WIDE_INT low = value & 0x3fff;
1900 HOST_WIDE_INT high = value & ~ 0x3fff;
1904 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1912 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1913 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1917 emit_insn (gen_rtx_SET (VOIDmode, temp,
1918 gen_rtx_HIGH (mode, operand1)));
1919 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1922 insn = emit_move_insn (operands[0], operands[1]);
1924 /* Now insert the most significant 32 bits of the value
1925 into the register. When we don't have a second register
1926 available, it could take up to nine instructions to load
1927 a 64-bit integer constant. Prior to reload, we force
1928 constants that would take more than three instructions
1929 to load to the constant pool. During and after reload,
1930 we have to handle all possible values. */
1933 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
1934 register and the value to be inserted is outside the
1935 range that can be loaded with three depdi instructions. */
1936 if (temp != operand0 && (insv >= 16384 || insv < -16384))
1938 operand1 = GEN_INT (insv);
1940 emit_insn (gen_rtx_SET (VOIDmode, temp,
1941 gen_rtx_HIGH (mode, operand1)));
1942 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
1943 emit_insn (gen_insv (operand0, GEN_INT (32),
1948 int len = 5, pos = 27;
1950 /* Insert the bits using the depdi instruction. */
1953 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
1954 HOST_WIDE_INT sign = v5 < 0;
1956 /* Left extend the insertion. */
1957 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
1958 while (pos > 0 && (insv & 1) == sign)
1960 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
1965 emit_insn (gen_insv (operand0, GEN_INT (len),
1966 GEN_INT (pos), GEN_INT (v5)));
1968 len = pos > 0 && pos < 5 ? pos : 5;
1975 = gen_rtx_EXPR_LIST (REG_EQUAL, op1, REG_NOTES (insn));
1980 /* Now have insn-emit do whatever it normally does. */
1984 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1985 it will need a link/runtime reloc). */
1988 reloc_needed (tree exp)
1992 switch (TREE_CODE (exp))
1999 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2000 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2005 case NON_LVALUE_EXPR:
2006 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2012 unsigned HOST_WIDE_INT ix;
2014 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2016 reloc |= reloc_needed (value);
2029 /* Does operand (which is a symbolic_operand) live in text space?
2030 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2034 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2036 if (GET_CODE (operand) == CONST)
2037 operand = XEXP (XEXP (operand, 0), 0);
2040 if (GET_CODE (operand) == SYMBOL_REF)
2041 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2045 if (GET_CODE (operand) == SYMBOL_REF)
2046 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2052 /* Return the best assembler insn template
2053 for moving operands[1] into operands[0] as a fullword. */
2055 singlemove_string (rtx *operands)
2057 HOST_WIDE_INT intval;
2059 if (GET_CODE (operands[0]) == MEM)
2060 return "stw %r1,%0";
2061 if (GET_CODE (operands[1]) == MEM)
2063 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2068 gcc_assert (GET_MODE (operands[1]) == SFmode);
2070 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2072 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2073 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2075 operands[1] = GEN_INT (i);
2076 /* Fall through to CONST_INT case. */
2078 if (GET_CODE (operands[1]) == CONST_INT)
2080 intval = INTVAL (operands[1]);
2082 if (VAL_14_BITS_P (intval))
2084 else if ((intval & 0x7ff) == 0)
2085 return "ldil L'%1,%0";
2086 else if (zdepi_cint_p (intval))
2087 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2089 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2091 return "copy %1,%0";
2095 /* Compute position (in OP[1]) and width (in OP[2])
2096 useful for copying IMM to a register using the zdepi
2097 instructions. Store the immediate value to insert in OP[0]. */
2099 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2103 /* Find the least significant set bit in IMM. */
2104 for (lsb = 0; lsb < 32; lsb++)
2111 /* Choose variants based on *sign* of the 5-bit field. */
2112 if ((imm & 0x10) == 0)
2113 len = (lsb <= 28) ? 4 : 32 - lsb;
2116 /* Find the width of the bitstring in IMM. */
2117 for (len = 5; len < 32; len++)
2119 if ((imm & (1 << len)) == 0)
2123 /* Sign extend IMM as a 5-bit value. */
2124 imm = (imm & 0xf) - 0x10;
2132 /* Compute position (in OP[1]) and width (in OP[2])
2133 useful for copying IMM to a register using the depdi,z
2134 instructions. Store the immediate value to insert in OP[0]. */
2136 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2138 HOST_WIDE_INT lsb, len;
2140 /* Find the least significant set bit in IMM. */
2141 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2148 /* Choose variants based on *sign* of the 5-bit field. */
2149 if ((imm & 0x10) == 0)
2150 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2151 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2154 /* Find the width of the bitstring in IMM. */
2155 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2157 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2161 /* Sign extend IMM as a 5-bit value. */
2162 imm = (imm & 0xf) - 0x10;
2170 /* Output assembler code to perform a doubleword move insn
2171 with operands OPERANDS. */
2174 output_move_double (rtx *operands)
2176 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2178 rtx addreg0 = 0, addreg1 = 0;
2180 /* First classify both operands. */
2182 if (REG_P (operands[0]))
2184 else if (offsettable_memref_p (operands[0]))
2186 else if (GET_CODE (operands[0]) == MEM)
2191 if (REG_P (operands[1]))
2193 else if (CONSTANT_P (operands[1]))
2195 else if (offsettable_memref_p (operands[1]))
2197 else if (GET_CODE (operands[1]) == MEM)
2202 /* Check for the cases that the operand constraints are not
2203 supposed to allow to happen. */
2204 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2206 /* Handle auto decrementing and incrementing loads and stores
2207 specifically, since the structure of the function doesn't work
2208 for them without major modification. Do it better when we learn
2209 this port about the general inc/dec addressing of PA.
2210 (This was written by tege. Chide him if it doesn't work.) */
2212 if (optype0 == MEMOP)
2214 /* We have to output the address syntax ourselves, since print_operand
2215 doesn't deal with the addresses we want to use. Fix this later. */
2217 rtx addr = XEXP (operands[0], 0);
2218 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2220 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2222 operands[0] = XEXP (addr, 0);
2223 gcc_assert (GET_CODE (operands[1]) == REG
2224 && GET_CODE (operands[0]) == REG);
2226 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2228 /* No overlap between high target register and address
2229 register. (We do this in a non-obvious way to
2230 save a register file writeback) */
2231 if (GET_CODE (addr) == POST_INC)
2232 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2233 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2235 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2237 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2239 operands[0] = XEXP (addr, 0);
2240 gcc_assert (GET_CODE (operands[1]) == REG
2241 && GET_CODE (operands[0]) == REG);
2243 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2244 /* No overlap between high target register and address
2245 register. (We do this in a non-obvious way to save a
2246 register file writeback) */
2247 if (GET_CODE (addr) == PRE_INC)
2248 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2249 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2252 if (optype1 == MEMOP)
2254 /* We have to output the address syntax ourselves, since print_operand
2255 doesn't deal with the addresses we want to use. Fix this later. */
2257 rtx addr = XEXP (operands[1], 0);
2258 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2260 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2262 operands[1] = XEXP (addr, 0);
2263 gcc_assert (GET_CODE (operands[0]) == REG
2264 && GET_CODE (operands[1]) == REG);
2266 if (!reg_overlap_mentioned_p (high_reg, addr))
2268 /* No overlap between high target register and address
2269 register. (We do this in a non-obvious way to
2270 save a register file writeback) */
2271 if (GET_CODE (addr) == POST_INC)
2272 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2273 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2277 /* This is an undefined situation. We should load into the
2278 address register *and* update that register. Probably
2279 we don't need to handle this at all. */
2280 if (GET_CODE (addr) == POST_INC)
2281 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2282 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2285 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2287 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2289 operands[1] = XEXP (addr, 0);
2290 gcc_assert (GET_CODE (operands[0]) == REG
2291 && GET_CODE (operands[1]) == REG);
2293 if (!reg_overlap_mentioned_p (high_reg, addr))
2295 /* No overlap between high target register and address
2296 register. (We do this in a non-obvious way to
2297 save a register file writeback) */
2298 if (GET_CODE (addr) == PRE_INC)
2299 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2300 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2304 /* This is an undefined situation. We should load into the
2305 address register *and* update that register. Probably
2306 we don't need to handle this at all. */
2307 if (GET_CODE (addr) == PRE_INC)
2308 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2309 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2312 else if (GET_CODE (addr) == PLUS
2313 && GET_CODE (XEXP (addr, 0)) == MULT)
2315 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2317 if (!reg_overlap_mentioned_p (high_reg, addr))
2321 xoperands[0] = high_reg;
2322 xoperands[1] = XEXP (addr, 1);
2323 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2324 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2325 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2327 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2333 xoperands[0] = high_reg;
2334 xoperands[1] = XEXP (addr, 1);
2335 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2336 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2337 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2339 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2344 /* If an operand is an unoffsettable memory ref, find a register
2345 we can increment temporarily to make it refer to the second word. */
2347 if (optype0 == MEMOP)
2348 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2350 if (optype1 == MEMOP)
2351 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2353 /* Ok, we can do one word at a time.
2354 Normally we do the low-numbered word first.
2356 In either case, set up in LATEHALF the operands to use
2357 for the high-numbered word and in some cases alter the
2358 operands in OPERANDS to be suitable for the low-numbered word. */
2360 if (optype0 == REGOP)
2361 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2362 else if (optype0 == OFFSOP)
2363 latehalf[0] = adjust_address (operands[0], SImode, 4);
2365 latehalf[0] = operands[0];
2367 if (optype1 == REGOP)
2368 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2369 else if (optype1 == OFFSOP)
2370 latehalf[1] = adjust_address (operands[1], SImode, 4);
2371 else if (optype1 == CNSTOP)
2372 split_double (operands[1], &operands[1], &latehalf[1]);
2374 latehalf[1] = operands[1];
2376 /* If the first move would clobber the source of the second one,
2377 do them in the other order.
2379 This can happen in two cases:
2381 mem -> register where the first half of the destination register
2382 is the same register used in the memory's address. Reload
2383 can create such insns.
2385 mem in this case will be either register indirect or register
2386 indirect plus a valid offset.
2388 register -> register move where REGNO(dst) == REGNO(src + 1)
2389 someone (Tim/Tege?) claimed this can happen for parameter loads.
2391 Handle mem -> register case first. */
2392 if (optype0 == REGOP
2393 && (optype1 == MEMOP || optype1 == OFFSOP)
2394 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2397 /* Do the late half first. */
2399 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2400 output_asm_insn (singlemove_string (latehalf), latehalf);
2404 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2405 return singlemove_string (operands);
2408 /* Now handle register -> register case. */
2409 if (optype0 == REGOP && optype1 == REGOP
2410 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2412 output_asm_insn (singlemove_string (latehalf), latehalf);
2413 return singlemove_string (operands);
2416 /* Normal case: do the two words, low-numbered first. */
2418 output_asm_insn (singlemove_string (operands), operands);
2420 /* Make any unoffsettable addresses point at high-numbered word. */
2422 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2424 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2427 output_asm_insn (singlemove_string (latehalf), latehalf);
2429 /* Undo the adds we just did. */
2431 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2433 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2439 output_fp_move_double (rtx *operands)
2441 if (FP_REG_P (operands[0]))
2443 if (FP_REG_P (operands[1])
2444 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2445 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2447 output_asm_insn ("fldd%F1 %1,%0", operands);
2449 else if (FP_REG_P (operands[1]))
2451 output_asm_insn ("fstd%F0 %1,%0", operands);
2457 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2459 /* This is a pain. You have to be prepared to deal with an
2460 arbitrary address here including pre/post increment/decrement.
2462 so avoid this in the MD. */
2463 gcc_assert (GET_CODE (operands[0]) == REG);
2465 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2466 xoperands[0] = operands[0];
2467 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2472 /* Return a REG that occurs in ADDR with coefficient 1.
2473 ADDR can be effectively incremented by incrementing REG. */
2476 find_addr_reg (rtx addr)
2478 while (GET_CODE (addr) == PLUS)
2480 if (GET_CODE (XEXP (addr, 0)) == REG)
2481 addr = XEXP (addr, 0);
2482 else if (GET_CODE (XEXP (addr, 1)) == REG)
2483 addr = XEXP (addr, 1);
2484 else if (CONSTANT_P (XEXP (addr, 0)))
2485 addr = XEXP (addr, 1);
2486 else if (CONSTANT_P (XEXP (addr, 1)))
2487 addr = XEXP (addr, 0);
2491 gcc_assert (GET_CODE (addr) == REG);
2495 /* Emit code to perform a block move.
2497 OPERANDS[0] is the destination pointer as a REG, clobbered.
2498 OPERANDS[1] is the source pointer as a REG, clobbered.
2499 OPERANDS[2] is a register for temporary storage.
2500 OPERANDS[3] is a register for temporary storage.
2501 OPERANDS[4] is the size as a CONST_INT
2502 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2503 OPERANDS[6] is another temporary register. */
2506 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2508 int align = INTVAL (operands[5]);
2509 unsigned long n_bytes = INTVAL (operands[4]);
2511 /* We can't move more than a word at a time because the PA
2512 has no longer integer move insns. (Could use fp mem ops?) */
2513 if (align > (TARGET_64BIT ? 8 : 4))
2514 align = (TARGET_64BIT ? 8 : 4);
2516 /* Note that we know each loop below will execute at least twice
2517 (else we would have open-coded the copy). */
2521 /* Pre-adjust the loop counter. */
2522 operands[4] = GEN_INT (n_bytes - 16);
2523 output_asm_insn ("ldi %4,%2", operands);
2526 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2527 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2528 output_asm_insn ("std,ma %3,8(%0)", operands);
2529 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2530 output_asm_insn ("std,ma %6,8(%0)", operands);
2532 /* Handle the residual. There could be up to 7 bytes of
2533 residual to copy! */
2534 if (n_bytes % 16 != 0)
2536 operands[4] = GEN_INT (n_bytes % 8);
2537 if (n_bytes % 16 >= 8)
2538 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2539 if (n_bytes % 8 != 0)
2540 output_asm_insn ("ldd 0(%1),%6", operands);
2541 if (n_bytes % 16 >= 8)
2542 output_asm_insn ("std,ma %3,8(%0)", operands);
2543 if (n_bytes % 8 != 0)
2544 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2549 /* Pre-adjust the loop counter. */
2550 operands[4] = GEN_INT (n_bytes - 8);
2551 output_asm_insn ("ldi %4,%2", operands);
2554 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2555 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2556 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2557 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2558 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2560 /* Handle the residual. There could be up to 7 bytes of
2561 residual to copy! */
2562 if (n_bytes % 8 != 0)
2564 operands[4] = GEN_INT (n_bytes % 4);
2565 if (n_bytes % 8 >= 4)
2566 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2567 if (n_bytes % 4 != 0)
2568 output_asm_insn ("ldw 0(%1),%6", operands);
2569 if (n_bytes % 8 >= 4)
2570 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2571 if (n_bytes % 4 != 0)
2572 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2577 /* Pre-adjust the loop counter. */
2578 operands[4] = GEN_INT (n_bytes - 4);
2579 output_asm_insn ("ldi %4,%2", operands);
2582 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2583 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2584 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2585 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2586 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2588 /* Handle the residual. */
2589 if (n_bytes % 4 != 0)
2591 if (n_bytes % 4 >= 2)
2592 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2593 if (n_bytes % 2 != 0)
2594 output_asm_insn ("ldb 0(%1),%6", operands);
2595 if (n_bytes % 4 >= 2)
2596 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2597 if (n_bytes % 2 != 0)
2598 output_asm_insn ("stb %6,0(%0)", operands);
2603 /* Pre-adjust the loop counter. */
2604 operands[4] = GEN_INT (n_bytes - 2);
2605 output_asm_insn ("ldi %4,%2", operands);
2608 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2609 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2610 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2611 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2612 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2614 /* Handle the residual. */
2615 if (n_bytes % 2 != 0)
2617 output_asm_insn ("ldb 0(%1),%3", operands);
2618 output_asm_insn ("stb %3,0(%0)", operands);
2627 /* Count the number of insns necessary to handle this block move.
2629 Basic structure is the same as emit_block_move, except that we
2630 count insns rather than emit them. */
2633 compute_movmem_length (rtx insn)
2635 rtx pat = PATTERN (insn);
2636 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2637 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2638 unsigned int n_insns = 0;
2640 /* We can't move more than four bytes at a time because the PA
2641 has no longer integer move insns. (Could use fp mem ops?) */
2642 if (align > (TARGET_64BIT ? 8 : 4))
2643 align = (TARGET_64BIT ? 8 : 4);
2645 /* The basic copying loop. */
2649 if (n_bytes % (2 * align) != 0)
2651 if ((n_bytes % (2 * align)) >= align)
2654 if ((n_bytes % align) != 0)
2658 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2662 /* Emit code to perform a block clear.
2664 OPERANDS[0] is the destination pointer as a REG, clobbered.
2665 OPERANDS[1] is a register for temporary storage.
2666 OPERANDS[2] is the size as a CONST_INT
2667 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2670 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2672 int align = INTVAL (operands[3]);
2673 unsigned long n_bytes = INTVAL (operands[2]);
2675 /* We can't clear more than a word at a time because the PA
2676 has no longer integer move insns. */
2677 if (align > (TARGET_64BIT ? 8 : 4))
2678 align = (TARGET_64BIT ? 8 : 4);
2680 /* Note that we know each loop below will execute at least twice
2681 (else we would have open-coded the copy). */
2685 /* Pre-adjust the loop counter. */
2686 operands[2] = GEN_INT (n_bytes - 16);
2687 output_asm_insn ("ldi %2,%1", operands);
2690 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2691 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2692 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2694 /* Handle the residual. There could be up to 7 bytes of
2695 residual to copy! */
2696 if (n_bytes % 16 != 0)
2698 operands[2] = GEN_INT (n_bytes % 8);
2699 if (n_bytes % 16 >= 8)
2700 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2701 if (n_bytes % 8 != 0)
2702 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2707 /* Pre-adjust the loop counter. */
2708 operands[2] = GEN_INT (n_bytes - 8);
2709 output_asm_insn ("ldi %2,%1", operands);
2712 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2713 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2714 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2716 /* Handle the residual. There could be up to 7 bytes of
2717 residual to copy! */
2718 if (n_bytes % 8 != 0)
2720 operands[2] = GEN_INT (n_bytes % 4);
2721 if (n_bytes % 8 >= 4)
2722 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2723 if (n_bytes % 4 != 0)
2724 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2729 /* Pre-adjust the loop counter. */
2730 operands[2] = GEN_INT (n_bytes - 4);
2731 output_asm_insn ("ldi %2,%1", operands);
2734 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2735 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2736 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2738 /* Handle the residual. */
2739 if (n_bytes % 4 != 0)
2741 if (n_bytes % 4 >= 2)
2742 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2743 if (n_bytes % 2 != 0)
2744 output_asm_insn ("stb %%r0,0(%0)", operands);
2749 /* Pre-adjust the loop counter. */
2750 operands[2] = GEN_INT (n_bytes - 2);
2751 output_asm_insn ("ldi %2,%1", operands);
2754 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2755 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2756 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2758 /* Handle the residual. */
2759 if (n_bytes % 2 != 0)
2760 output_asm_insn ("stb %%r0,0(%0)", operands);
2769 /* Count the number of insns necessary to handle this block move.
2771 Basic structure is the same as emit_block_move, except that we
2772 count insns rather than emit them. */
2775 compute_clrmem_length (rtx insn)
2777 rtx pat = PATTERN (insn);
2778 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2779 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2780 unsigned int n_insns = 0;
2782 /* We can't clear more than a word at a time because the PA
2783 has no longer integer move insns. */
2784 if (align > (TARGET_64BIT ? 8 : 4))
2785 align = (TARGET_64BIT ? 8 : 4);
2787 /* The basic loop. */
2791 if (n_bytes % (2 * align) != 0)
2793 if ((n_bytes % (2 * align)) >= align)
2796 if ((n_bytes % align) != 0)
2800 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2806 output_and (rtx *operands)
2808 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2810 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2811 int ls0, ls1, ms0, p, len;
2813 for (ls0 = 0; ls0 < 32; ls0++)
2814 if ((mask & (1 << ls0)) == 0)
2817 for (ls1 = ls0; ls1 < 32; ls1++)
2818 if ((mask & (1 << ls1)) != 0)
2821 for (ms0 = ls1; ms0 < 32; ms0++)
2822 if ((mask & (1 << ms0)) == 0)
2825 gcc_assert (ms0 == 32);
2833 operands[2] = GEN_INT (len);
2834 return "{extru|extrw,u} %1,31,%2,%0";
2838 /* We could use this `depi' for the case above as well, but `depi'
2839 requires one more register file access than an `extru'. */
2844 operands[2] = GEN_INT (p);
2845 operands[3] = GEN_INT (len);
2846 return "{depi|depwi} 0,%2,%3,%0";
2850 return "and %1,%2,%0";
2853 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2854 storing the result in operands[0]. */
2856 output_64bit_and (rtx *operands)
2858 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2860 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2861 int ls0, ls1, ms0, p, len;
2863 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2864 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2867 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2868 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2871 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2872 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2875 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
2877 if (ls1 == HOST_BITS_PER_WIDE_INT)
2883 operands[2] = GEN_INT (len);
2884 return "extrd,u %1,63,%2,%0";
2888 /* We could use this `depi' for the case above as well, but `depi'
2889 requires one more register file access than an `extru'. */
2894 operands[2] = GEN_INT (p);
2895 operands[3] = GEN_INT (len);
2896 return "depdi 0,%2,%3,%0";
2900 return "and %1,%2,%0";
2904 output_ior (rtx *operands)
2906 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2907 int bs0, bs1, p, len;
2909 if (INTVAL (operands[2]) == 0)
2910 return "copy %1,%0";
2912 for (bs0 = 0; bs0 < 32; bs0++)
2913 if ((mask & (1 << bs0)) != 0)
2916 for (bs1 = bs0; bs1 < 32; bs1++)
2917 if ((mask & (1 << bs1)) == 0)
2920 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
2925 operands[2] = GEN_INT (p);
2926 operands[3] = GEN_INT (len);
2927 return "{depi|depwi} -1,%2,%3,%0";
2930 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2931 storing the result in operands[0]. */
2933 output_64bit_ior (rtx *operands)
2935 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2936 int bs0, bs1, p, len;
2938 if (INTVAL (operands[2]) == 0)
2939 return "copy %1,%0";
2941 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2942 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
2945 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2946 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
2949 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
2950 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
2955 operands[2] = GEN_INT (p);
2956 operands[3] = GEN_INT (len);
2957 return "depdi -1,%2,%3,%0";
2960 /* Target hook for assembling integer objects. This code handles
2961 aligned SI and DI integers specially since function references
2962 must be preceded by P%. */
2965 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
2967 if (size == UNITS_PER_WORD
2969 && function_label_operand (x, VOIDmode))
2971 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
2972 output_addr_const (asm_out_file, x);
2973 fputc ('\n', asm_out_file);
2976 return default_assemble_integer (x, size, aligned_p);
2979 /* Output an ascii string. */
2981 output_ascii (FILE *file, const char *p, int size)
2985 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2987 /* The HP assembler can only take strings of 256 characters at one
2988 time. This is a limitation on input line length, *not* the
2989 length of the string. Sigh. Even worse, it seems that the
2990 restriction is in number of input characters (see \xnn &
2991 \whatever). So we have to do this very carefully. */
2993 fputs ("\t.STRING \"", file);
2996 for (i = 0; i < size; i += 4)
3000 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3002 register unsigned int c = (unsigned char) p[i + io];
3004 if (c == '\"' || c == '\\')
3005 partial_output[co++] = '\\';
3006 if (c >= ' ' && c < 0177)
3007 partial_output[co++] = c;
3011 partial_output[co++] = '\\';
3012 partial_output[co++] = 'x';
3013 hexd = c / 16 - 0 + '0';
3015 hexd -= '9' - 'a' + 1;
3016 partial_output[co++] = hexd;
3017 hexd = c % 16 - 0 + '0';
3019 hexd -= '9' - 'a' + 1;
3020 partial_output[co++] = hexd;
3023 if (chars_output + co > 243)
3025 fputs ("\"\n\t.STRING \"", file);
3028 fwrite (partial_output, 1, (size_t) co, file);
3032 fputs ("\"\n", file);
3035 /* Try to rewrite floating point comparisons & branches to avoid
3036 useless add,tr insns.
3038 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3039 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3040 first attempt to remove useless add,tr insns. It is zero
3041 for the second pass as reorg sometimes leaves bogus REG_DEAD
3044 When CHECK_NOTES is zero we can only eliminate add,tr insns
3045 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3048 remove_useless_addtr_insns (int check_notes)
3051 static int pass = 0;
3053 /* This is fairly cheap, so always run it when optimizing. */
3057 int fbranch_count = 0;
3059 /* Walk all the insns in this function looking for fcmp & fbranch
3060 instructions. Keep track of how many of each we find. */
3061 for (insn = get_insns (); insn; insn = next_insn (insn))
3065 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3066 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3069 tmp = PATTERN (insn);
3071 /* It must be a set. */
3072 if (GET_CODE (tmp) != SET)
3075 /* If the destination is CCFP, then we've found an fcmp insn. */
3076 tmp = SET_DEST (tmp);
3077 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3083 tmp = PATTERN (insn);
3084 /* If this is an fbranch instruction, bump the fbranch counter. */
3085 if (GET_CODE (tmp) == SET
3086 && SET_DEST (tmp) == pc_rtx
3087 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3088 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3089 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3090 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3098 /* Find all floating point compare + branch insns. If possible,
3099 reverse the comparison & the branch to avoid add,tr insns. */
3100 for (insn = get_insns (); insn; insn = next_insn (insn))
3104 /* Ignore anything that isn't an INSN. */
3105 if (GET_CODE (insn) != INSN)
3108 tmp = PATTERN (insn);
3110 /* It must be a set. */
3111 if (GET_CODE (tmp) != SET)
3114 /* The destination must be CCFP, which is register zero. */
3115 tmp = SET_DEST (tmp);
3116 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3119 /* INSN should be a set of CCFP.
3121 See if the result of this insn is used in a reversed FP
3122 conditional branch. If so, reverse our condition and
3123 the branch. Doing so avoids useless add,tr insns. */
3124 next = next_insn (insn);
3127 /* Jumps, calls and labels stop our search. */
3128 if (GET_CODE (next) == JUMP_INSN
3129 || GET_CODE (next) == CALL_INSN
3130 || GET_CODE (next) == CODE_LABEL)
3133 /* As does another fcmp insn. */
3134 if (GET_CODE (next) == INSN
3135 && GET_CODE (PATTERN (next)) == SET
3136 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3137 && REGNO (SET_DEST (PATTERN (next))) == 0)
3140 next = next_insn (next);
3143 /* Is NEXT_INSN a branch? */
3145 && GET_CODE (next) == JUMP_INSN)
3147 rtx pattern = PATTERN (next);
3149 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3150 and CCFP dies, then reverse our conditional and the branch
3151 to avoid the add,tr. */
3152 if (GET_CODE (pattern) == SET
3153 && SET_DEST (pattern) == pc_rtx
3154 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3155 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3156 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3157 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3158 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3159 && (fcmp_count == fbranch_count
3161 && find_regno_note (next, REG_DEAD, 0))))
3163 /* Reverse the branch. */
3164 tmp = XEXP (SET_SRC (pattern), 1);
3165 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3166 XEXP (SET_SRC (pattern), 2) = tmp;
3167 INSN_CODE (next) = -1;
3169 /* Reverse our condition. */
3170 tmp = PATTERN (insn);
3171 PUT_CODE (XEXP (tmp, 1),
3172 (reverse_condition_maybe_unordered
3173 (GET_CODE (XEXP (tmp, 1)))));
3183 /* You may have trouble believing this, but this is the 32 bit HP-PA
3188 Variable arguments (optional; any number may be allocated)
3190 SP-(4*(N+9)) arg word N
3195 Fixed arguments (must be allocated; may remain unused)
3204 SP-32 External Data Pointer (DP)
3206 SP-24 External/stub RP (RP')
3210 SP-8 Calling Stub RP (RP'')
3215 SP-0 Stack Pointer (points to next available address)
3219 /* This function saves registers as follows. Registers marked with ' are
3220 this function's registers (as opposed to the previous function's).
3221 If a frame_pointer isn't needed, r4 is saved as a general register;
3222 the space for the frame pointer is still allocated, though, to keep
3228 SP (FP') Previous FP
3229 SP + 4 Alignment filler (sigh)
3230 SP + 8 Space for locals reserved here.
3234 SP + n All call saved register used.
3238 SP + o All call saved fp registers used.
3242 SP + p (SP') points to next available address.
3246 /* Global variables set by output_function_prologue(). */
3247 /* Size of frame. Need to know this to emit return insns from
3249 static HOST_WIDE_INT actual_fsize, local_fsize;
3250 static int save_fregs;
3252 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3253 Handle case where DISP > 8k by using the add_high_const patterns.
3255 Note in DISP > 8k case, we will leave the high part of the address
3256 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3259 store_reg (int reg, HOST_WIDE_INT disp, int base)
3261 rtx insn, dest, src, basereg;
3263 src = gen_rtx_REG (word_mode, reg);
3264 basereg = gen_rtx_REG (Pmode, base);
3265 if (VAL_14_BITS_P (disp))
3267 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3268 insn = emit_move_insn (dest, src);
3270 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3272 rtx delta = GEN_INT (disp);
3273 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3275 emit_move_insn (tmpreg, delta);
3276 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3280 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3281 gen_rtx_SET (VOIDmode, tmpreg,
3282 gen_rtx_PLUS (Pmode, basereg, delta)),
3284 RTX_FRAME_RELATED_P (insn) = 1;
3286 dest = gen_rtx_MEM (word_mode, tmpreg);
3287 insn = emit_move_insn (dest, src);
3291 rtx delta = GEN_INT (disp);
3292 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3293 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3295 emit_move_insn (tmpreg, high);
3296 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3297 insn = emit_move_insn (dest, src);
3301 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3302 gen_rtx_SET (VOIDmode,
3303 gen_rtx_MEM (word_mode,
3304 gen_rtx_PLUS (word_mode, basereg,
3312 RTX_FRAME_RELATED_P (insn) = 1;
3315 /* Emit RTL to store REG at the memory location specified by BASE and then
3316 add MOD to BASE. MOD must be <= 8k. */
3319 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3321 rtx insn, basereg, srcreg, delta;
3323 gcc_assert (VAL_14_BITS_P (mod));
3325 basereg = gen_rtx_REG (Pmode, base);
3326 srcreg = gen_rtx_REG (word_mode, reg);
3327 delta = GEN_INT (mod);
3329 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3332 RTX_FRAME_RELATED_P (insn) = 1;
3334 /* RTX_FRAME_RELATED_P must be set on each frame related set
3335 in a parallel with more than one element. */
3336 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3337 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3341 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3342 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3343 whether to add a frame note or not.
3345 In the DISP > 8k case, we leave the high part of the address in %r1.
3346 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3349 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3353 if (VAL_14_BITS_P (disp))
3355 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3356 plus_constant (gen_rtx_REG (Pmode, base), disp));
3358 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3360 rtx basereg = gen_rtx_REG (Pmode, base);
3361 rtx delta = GEN_INT (disp);
3362 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3364 emit_move_insn (tmpreg, delta);
3365 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3366 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3369 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3370 gen_rtx_SET (VOIDmode, tmpreg,
3371 gen_rtx_PLUS (Pmode, basereg, delta)),
3376 rtx basereg = gen_rtx_REG (Pmode, base);
3377 rtx delta = GEN_INT (disp);
3378 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3380 emit_move_insn (tmpreg,
3381 gen_rtx_PLUS (Pmode, basereg,
3382 gen_rtx_HIGH (Pmode, delta)));
3383 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3384 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3387 if (DO_FRAME_NOTES && note)
3388 RTX_FRAME_RELATED_P (insn) = 1;
3392 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3397 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3398 be consistent with the rounding and size calculation done here.
3399 Change them at the same time. */
3401 /* We do our own stack alignment. First, round the size of the
3402 stack locals up to a word boundary. */
3403 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3405 /* Space for previous frame pointer + filler. If any frame is
3406 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3407 waste some space here for the sake of HP compatibility. The
3408 first slot is only used when the frame pointer is needed. */
3409 if (size || frame_pointer_needed)
3410 size += STARTING_FRAME_OFFSET;
3412 /* If the current function calls __builtin_eh_return, then we need
3413 to allocate stack space for registers that will hold data for
3414 the exception handler. */
3415 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3419 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3421 size += i * UNITS_PER_WORD;
3424 /* Account for space used by the callee general register saves. */
3425 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3426 if (regs_ever_live[i])
3427 size += UNITS_PER_WORD;
3429 /* Account for space used by the callee floating point register saves. */
3430 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3431 if (regs_ever_live[i]
3432 || (!TARGET_64BIT && regs_ever_live[i + 1]))
3436 /* We always save both halves of the FP register, so always
3437 increment the frame size by 8 bytes. */
3441 /* If any of the floating registers are saved, account for the
3442 alignment needed for the floating point register save block. */
3445 size = (size + 7) & ~7;
3450 /* The various ABIs include space for the outgoing parameters in the
3451 size of the current function's stack frame. We don't need to align
3452 for the outgoing arguments as their alignment is set by the final
3453 rounding for the frame as a whole. */
3454 size += current_function_outgoing_args_size;
3456 /* Allocate space for the fixed frame marker. This space must be
3457 allocated for any function that makes calls or allocates
3459 if (!current_function_is_leaf || size)
3460 size += TARGET_64BIT ? 48 : 32;
3462 /* Finally, round to the preferred stack boundary. */
3463 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3464 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3467 /* Generate the assembly code for function entry. FILE is a stdio
3468 stream to output the code to. SIZE is an int: how many units of
3469 temporary storage to allocate.
3471 Refer to the array `regs_ever_live' to determine which registers to
3472 save; `regs_ever_live[I]' is nonzero if register number I is ever
3473 used in the function. This function is responsible for knowing
3474 which registers should not be saved even if used. */
3476 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3477 of memory. If any fpu reg is used in the function, we allocate
3478 such a block here, at the bottom of the frame, just in case it's needed.
3480 If this function is a leaf procedure, then we may choose not
3481 to do a "save" insn. The decision about whether or not
3482 to do this is made in regclass.c. */
3485 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3487 /* The function's label and associated .PROC must never be
3488 separated and must be output *after* any profiling declarations
3489 to avoid changing spaces/subspaces within a procedure. */
3490 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3491 fputs ("\t.PROC\n", file);
3493 /* hppa_expand_prologue does the dirty work now. We just need
3494 to output the assembler directives which denote the start
3496 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3497 if (regs_ever_live[2])
3498 fputs (",CALLS,SAVE_RP", file);
3500 fputs (",NO_CALLS", file);
3502 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3503 at the beginning of the frame and that it is used as the frame
3504 pointer for the frame. We do this because our current frame
3505 layout doesn't conform to that specified in the HP runtime
3506 documentation and we need a way to indicate to programs such as
3507 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3508 isn't used by HP compilers but is supported by the assembler.
3509 However, SAVE_SP is supposed to indicate that the previous stack
3510 pointer has been saved in the frame marker. */
3511 if (frame_pointer_needed)
3512 fputs (",SAVE_SP", file);
3514 /* Pass on information about the number of callee register saves
3515 performed in the prologue.
3517 The compiler is supposed to pass the highest register number
3518 saved, the assembler then has to adjust that number before
3519 entering it into the unwind descriptor (to account for any
3520 caller saved registers with lower register numbers than the
3521 first callee saved register). */
3523 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3526 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3528 fputs ("\n\t.ENTRY\n", file);
3530 remove_useless_addtr_insns (0);
3534 hppa_expand_prologue (void)
3536 int merge_sp_adjust_with_store = 0;
3537 HOST_WIDE_INT size = get_frame_size ();
3538 HOST_WIDE_INT offset;
3546 /* Compute total size for frame pointer, filler, locals and rounding to
3547 the next word boundary. Similar code appears in compute_frame_size
3548 and must be changed in tandem with this code. */
3549 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3550 if (local_fsize || frame_pointer_needed)
3551 local_fsize += STARTING_FRAME_OFFSET;
3553 actual_fsize = compute_frame_size (size, &save_fregs);
3555 /* Compute a few things we will use often. */
3556 tmpreg = gen_rtx_REG (word_mode, 1);
3558 /* Save RP first. The calling conventions manual states RP will
3559 always be stored into the caller's frame at sp - 20 or sp - 16
3560 depending on which ABI is in use. */
3561 if (regs_ever_live[2] || current_function_calls_eh_return)
3562 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3564 /* Allocate the local frame and set up the frame pointer if needed. */
3565 if (actual_fsize != 0)
3567 if (frame_pointer_needed)
3569 /* Copy the old frame pointer temporarily into %r1. Set up the
3570 new stack pointer, then store away the saved old frame pointer
3571 into the stack at sp and at the same time update the stack
3572 pointer by actual_fsize bytes. Two versions, first
3573 handles small (<8k) frames. The second handles large (>=8k)
3575 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3577 RTX_FRAME_RELATED_P (insn) = 1;
3579 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3581 RTX_FRAME_RELATED_P (insn) = 1;
3583 if (VAL_14_BITS_P (actual_fsize))
3584 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3587 /* It is incorrect to store the saved frame pointer at *sp,
3588 then increment sp (writes beyond the current stack boundary).
3590 So instead use stwm to store at *sp and post-increment the
3591 stack pointer as an atomic operation. Then increment sp to
3592 finish allocating the new frame. */
3593 HOST_WIDE_INT adjust1 = 8192 - 64;
3594 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3596 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3597 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3601 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3602 we need to store the previous stack pointer (frame pointer)
3603 into the frame marker on targets that use the HP unwind
3604 library. This allows the HP unwind library to be used to
3605 unwind GCC frames. However, we are not fully compatible
3606 with the HP library because our frame layout differs from
3607 that specified in the HP runtime specification.
3609 We don't want a frame note on this instruction as the frame
3610 marker moves during dynamic stack allocation.
3612 This instruction also serves as a blockage to prevent
3613 register spills from being scheduled before the stack
3614 pointer is raised. This is necessary as we store
3615 registers using the frame pointer as a base register,
3616 and the frame pointer is set before sp is raised. */
3617 if (TARGET_HPUX_UNWIND_LIBRARY)
3619 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3620 GEN_INT (TARGET_64BIT ? -8 : -4));
3622 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3626 emit_insn (gen_blockage ());
3628 /* no frame pointer needed. */
3631 /* In some cases we can perform the first callee register save
3632 and allocating the stack frame at the same time. If so, just
3633 make a note of it and defer allocating the frame until saving
3634 the callee registers. */
3635 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3636 merge_sp_adjust_with_store = 1;
3637 /* Can not optimize. Adjust the stack frame by actual_fsize
3640 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3645 /* Normal register save.
3647 Do not save the frame pointer in the frame_pointer_needed case. It
3648 was done earlier. */
3649 if (frame_pointer_needed)
3651 offset = local_fsize;
3653 /* Saving the EH return data registers in the frame is the simplest
3654 way to get the frame unwind information emitted. We put them
3655 just before the general registers. */
3656 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3658 unsigned int i, regno;
3662 regno = EH_RETURN_DATA_REGNO (i);
3663 if (regno == INVALID_REGNUM)
3666 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3667 offset += UNITS_PER_WORD;
3671 for (i = 18; i >= 4; i--)
3672 if (regs_ever_live[i] && ! call_used_regs[i])
3674 store_reg (i, offset, FRAME_POINTER_REGNUM);
3675 offset += UNITS_PER_WORD;
3678 /* Account for %r3 which is saved in a special place. */
3681 /* No frame pointer needed. */
3684 offset = local_fsize - actual_fsize;
3686 /* Saving the EH return data registers in the frame is the simplest
3687 way to get the frame unwind information emitted. */
3688 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3690 unsigned int i, regno;
3694 regno = EH_RETURN_DATA_REGNO (i);
3695 if (regno == INVALID_REGNUM)
3698 /* If merge_sp_adjust_with_store is nonzero, then we can
3699 optimize the first save. */
3700 if (merge_sp_adjust_with_store)
3702 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3703 merge_sp_adjust_with_store = 0;
3706 store_reg (regno, offset, STACK_POINTER_REGNUM);
3707 offset += UNITS_PER_WORD;
3711 for (i = 18; i >= 3; i--)
3712 if (regs_ever_live[i] && ! call_used_regs[i])
3714 /* If merge_sp_adjust_with_store is nonzero, then we can
3715 optimize the first GR save. */
3716 if (merge_sp_adjust_with_store)
3718 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3719 merge_sp_adjust_with_store = 0;
3722 store_reg (i, offset, STACK_POINTER_REGNUM);
3723 offset += UNITS_PER_WORD;
3727 /* If we wanted to merge the SP adjustment with a GR save, but we never
3728 did any GR saves, then just emit the adjustment here. */
3729 if (merge_sp_adjust_with_store)
3730 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3734 /* The hppa calling conventions say that %r19, the pic offset
3735 register, is saved at sp - 32 (in this function's frame)
3736 when generating PIC code. FIXME: What is the correct thing
3737 to do for functions which make no calls and allocate no
3738 frame? Do we need to allocate a frame, or can we just omit
3739 the save? For now we'll just omit the save.
3741 We don't want a note on this insn as the frame marker can
3742 move if there is a dynamic stack allocation. */
3743 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3745 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3747 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3751 /* Align pointer properly (doubleword boundary). */
3752 offset = (offset + 7) & ~7;
3754 /* Floating point register store. */
3759 /* First get the frame or stack pointer to the start of the FP register
3761 if (frame_pointer_needed)
3763 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3764 base = frame_pointer_rtx;
3768 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3769 base = stack_pointer_rtx;
3772 /* Now actually save the FP registers. */
3773 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3775 if (regs_ever_live[i]
3776 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3778 rtx addr, insn, reg;
3779 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3780 reg = gen_rtx_REG (DFmode, i);
3781 insn = emit_move_insn (addr, reg);
3784 RTX_FRAME_RELATED_P (insn) = 1;
3787 rtx mem = gen_rtx_MEM (DFmode,
3788 plus_constant (base, offset));
3790 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3791 gen_rtx_SET (VOIDmode, mem, reg),
3796 rtx meml = gen_rtx_MEM (SFmode,
3797 plus_constant (base, offset));
3798 rtx memr = gen_rtx_MEM (SFmode,
3799 plus_constant (base, offset + 4));
3800 rtx regl = gen_rtx_REG (SFmode, i);
3801 rtx regr = gen_rtx_REG (SFmode, i + 1);
3802 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3803 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3806 RTX_FRAME_RELATED_P (setl) = 1;
3807 RTX_FRAME_RELATED_P (setr) = 1;
3808 vec = gen_rtvec (2, setl, setr);
3810 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3811 gen_rtx_SEQUENCE (VOIDmode, vec),
3815 offset += GET_MODE_SIZE (DFmode);
3822 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3823 Handle case where DISP > 8k by using the add_high_const patterns. */
3826 load_reg (int reg, HOST_WIDE_INT disp, int base)
3828 rtx dest = gen_rtx_REG (word_mode, reg);
3829 rtx basereg = gen_rtx_REG (Pmode, base);
3832 if (VAL_14_BITS_P (disp))
3833 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3834 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3836 rtx delta = GEN_INT (disp);
3837 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3839 emit_move_insn (tmpreg, delta);
3840 if (TARGET_DISABLE_INDEXING)
3842 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3843 src = gen_rtx_MEM (word_mode, tmpreg);
3846 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3850 rtx delta = GEN_INT (disp);
3851 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3852 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3854 emit_move_insn (tmpreg, high);
3855 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3858 emit_move_insn (dest, src);
3861 /* Update the total code bytes output to the text section. */
3864 update_total_code_bytes (int nbytes)
3866 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
3867 && !IN_NAMED_SECTION_P (cfun->decl))
3869 if (INSN_ADDRESSES_SET_P ())
3871 unsigned long old_total = total_code_bytes;
3873 total_code_bytes += nbytes;
3875 /* Be prepared to handle overflows. */
3876 if (old_total > total_code_bytes)
3877 total_code_bytes = -1;
3880 total_code_bytes = -1;
3884 /* This function generates the assembly code for function exit.
3885 Args are as for output_function_prologue ().
3887 The function epilogue should not depend on the current stack
3888 pointer! It should use the frame pointer only. This is mandatory
3889 because of alloca; we also take advantage of it to omit stack
3890 adjustments before returning. */
3893 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3895 rtx insn = get_last_insn ();
3899 /* hppa_expand_epilogue does the dirty work now. We just need
3900 to output the assembler directives which denote the end
3903 To make debuggers happy, emit a nop if the epilogue was completely
3904 eliminated due to a volatile call as the last insn in the
3905 current function. That way the return address (in %r2) will
3906 always point to a valid instruction in the current function. */
3908 /* Get the last real insn. */
3909 if (GET_CODE (insn) == NOTE)
3910 insn = prev_real_insn (insn);
3912 /* If it is a sequence, then look inside. */
3913 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3914 insn = XVECEXP (PATTERN (insn), 0, 0);
3916 /* If insn is a CALL_INSN, then it must be a call to a volatile
3917 function (otherwise there would be epilogue insns). */
3918 if (insn && GET_CODE (insn) == CALL_INSN)
3920 fputs ("\tnop\n", file);
3924 fputs ("\t.EXIT\n\t.PROCEND\n", file);
3926 if (TARGET_SOM && TARGET_GAS)
3928 /* We done with this subspace except possibly for some additional
3929 debug information. Forget that we are in this subspace to ensure
3930 that the next function is output in its own subspace. */
3934 if (INSN_ADDRESSES_SET_P ())
3936 insn = get_last_nonnote_insn ();
3937 last_address += INSN_ADDRESSES (INSN_UID (insn));
3939 last_address += insn_default_length (insn);
3940 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
3941 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
3944 /* Finally, update the total number of code bytes output so far. */
3945 update_total_code_bytes (last_address);
3949 hppa_expand_epilogue (void)
3952 HOST_WIDE_INT offset;
3953 HOST_WIDE_INT ret_off = 0;
3955 int merge_sp_adjust_with_load = 0;
3957 /* We will use this often. */
3958 tmpreg = gen_rtx_REG (word_mode, 1);
3960 /* Try to restore RP early to avoid load/use interlocks when
3961 RP gets used in the return (bv) instruction. This appears to still
3962 be necessary even when we schedule the prologue and epilogue. */
3963 if (regs_ever_live [2] || current_function_calls_eh_return)
3965 ret_off = TARGET_64BIT ? -16 : -20;
3966 if (frame_pointer_needed)
3968 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
3973 /* No frame pointer, and stack is smaller than 8k. */
3974 if (VAL_14_BITS_P (ret_off - actual_fsize))
3976 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
3982 /* General register restores. */
3983 if (frame_pointer_needed)
3985 offset = local_fsize;
3987 /* If the current function calls __builtin_eh_return, then we need
3988 to restore the saved EH data registers. */
3989 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3991 unsigned int i, regno;
3995 regno = EH_RETURN_DATA_REGNO (i);
3996 if (regno == INVALID_REGNUM)
3999 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4000 offset += UNITS_PER_WORD;
4004 for (i = 18; i >= 4; i--)
4005 if (regs_ever_live[i] && ! call_used_regs[i])
4007 load_reg (i, offset, FRAME_POINTER_REGNUM);
4008 offset += UNITS_PER_WORD;
4013 offset = local_fsize - actual_fsize;
4015 /* If the current function calls __builtin_eh_return, then we need
4016 to restore the saved EH data registers. */
4017 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4019 unsigned int i, regno;
4023 regno = EH_RETURN_DATA_REGNO (i);
4024 if (regno == INVALID_REGNUM)
4027 /* Only for the first load.
4028 merge_sp_adjust_with_load holds the register load
4029 with which we will merge the sp adjustment. */
4030 if (merge_sp_adjust_with_load == 0
4032 && VAL_14_BITS_P (-actual_fsize))
4033 merge_sp_adjust_with_load = regno;
4035 load_reg (regno, offset, STACK_POINTER_REGNUM);
4036 offset += UNITS_PER_WORD;
4040 for (i = 18; i >= 3; i--)
4042 if (regs_ever_live[i] && ! call_used_regs[i])
4044 /* Only for the first load.
4045 merge_sp_adjust_with_load holds the register load
4046 with which we will merge the sp adjustment. */
4047 if (merge_sp_adjust_with_load == 0
4049 && VAL_14_BITS_P (-actual_fsize))
4050 merge_sp_adjust_with_load = i;
4052 load_reg (i, offset, STACK_POINTER_REGNUM);
4053 offset += UNITS_PER_WORD;
4058 /* Align pointer properly (doubleword boundary). */
4059 offset = (offset + 7) & ~7;
4061 /* FP register restores. */
4064 /* Adjust the register to index off of. */
4065 if (frame_pointer_needed)
4066 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4068 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4070 /* Actually do the restores now. */
4071 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4072 if (regs_ever_live[i]
4073 || (! TARGET_64BIT && regs_ever_live[i + 1]))
4075 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4076 rtx dest = gen_rtx_REG (DFmode, i);
4077 emit_move_insn (dest, src);
4081 /* Emit a blockage insn here to keep these insns from being moved to
4082 an earlier spot in the epilogue, or into the main instruction stream.
4084 This is necessary as we must not cut the stack back before all the
4085 restores are finished. */
4086 emit_insn (gen_blockage ());
4088 /* Reset stack pointer (and possibly frame pointer). The stack
4089 pointer is initially set to fp + 64 to avoid a race condition. */
4090 if (frame_pointer_needed)
4092 rtx delta = GEN_INT (-64);
4094 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4095 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4097 /* If we were deferring a callee register restore, do it now. */
4098 else if (merge_sp_adjust_with_load)
4100 rtx delta = GEN_INT (-actual_fsize);
4101 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4103 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4105 else if (actual_fsize != 0)
4106 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4109 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4110 frame greater than 8k), do so now. */
4112 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4114 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4116 rtx sa = EH_RETURN_STACKADJ_RTX;
4118 emit_insn (gen_blockage ());
4119 emit_insn (TARGET_64BIT
4120 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4121 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4126 hppa_pic_save_rtx (void)
4128 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4131 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4132 #define NO_DEFERRED_PROFILE_COUNTERS 0
4135 /* Define heap vector type for funcdef numbers. */
4137 DEF_VEC_ALLOC_I(int,heap);
4139 /* Vector of funcdef numbers. */
4140 static VEC(int,heap) *funcdef_nos;
4142 /* Output deferred profile counters. */
4144 output_deferred_profile_counters (void)
4149 if (VEC_empty (int, funcdef_nos))
4153 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4154 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4156 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4158 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4159 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4162 VEC_free (int, heap, funcdef_nos);
4166 hppa_profile_hook (int label_no)
4168 /* We use SImode for the address of the function in both 32 and
4169 64-bit code to avoid having to provide DImode versions of the
4170 lcla2 and load_offset_label_address insn patterns. */
4171 rtx reg = gen_reg_rtx (SImode);
4172 rtx label_rtx = gen_label_rtx ();
4173 rtx begin_label_rtx, call_insn;
4174 char begin_label_name[16];
4176 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4178 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4181 emit_move_insn (arg_pointer_rtx,
4182 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4185 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4187 /* The address of the function is loaded into %r25 with a instruction-
4188 relative sequence that avoids the use of relocations. The sequence
4189 is split so that the load_offset_label_address instruction can
4190 occupy the delay slot of the call to _mcount. */
4192 emit_insn (gen_lcla2 (reg, label_rtx));
4194 emit_insn (gen_lcla1 (reg, label_rtx));
4196 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4197 reg, begin_label_rtx, label_rtx));
4199 #if !NO_DEFERRED_PROFILE_COUNTERS
4201 rtx count_label_rtx, addr, r24;
4202 char count_label_name[16];
4204 VEC_safe_push (int, heap, funcdef_nos, label_no);
4205 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4206 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4208 addr = force_reg (Pmode, count_label_rtx);
4209 r24 = gen_rtx_REG (Pmode, 24);
4210 emit_move_insn (r24, addr);
4213 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4214 gen_rtx_SYMBOL_REF (Pmode,
4216 GEN_INT (TARGET_64BIT ? 24 : 12)));
4218 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4223 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4224 gen_rtx_SYMBOL_REF (Pmode,
4226 GEN_INT (TARGET_64BIT ? 16 : 8)));
4230 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4231 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4233 /* Indicate the _mcount call cannot throw, nor will it execute a
4235 REG_NOTES (call_insn)
4236 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
4239 /* Fetch the return address for the frame COUNT steps up from
4240 the current frame, after the prologue. FRAMEADDR is the
4241 frame pointer of the COUNT frame.
4243 We want to ignore any export stub remnants here. To handle this,
4244 we examine the code at the return address, and if it is an export
4245 stub, we return a memory rtx for the stub return address stored
4248 The value returned is used in two different ways:
4250 1. To find a function's caller.
4252 2. To change the return address for a function.
4254 This function handles most instances of case 1; however, it will
4255 fail if there are two levels of stubs to execute on the return
4256 path. The only way I believe that can happen is if the return value
4257 needs a parameter relocation, which never happens for C code.
4259 This function handles most instances of case 2; however, it will
4260 fail if we did not originally have stub code on the return path
4261 but will need stub code on the new return path. This can happen if
4262 the caller & callee are both in the main program, but the new
4263 return location is in a shared library. */
4266 return_addr_rtx (int count, rtx frameaddr)
4276 rp = get_hard_reg_initial_val (Pmode, 2);
4278 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4281 saved_rp = gen_reg_rtx (Pmode);
4282 emit_move_insn (saved_rp, rp);
4284 /* Get pointer to the instruction stream. We have to mask out the
4285 privilege level from the two low order bits of the return address
4286 pointer here so that ins will point to the start of the first
4287 instruction that would have been executed if we returned. */
4288 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4289 label = gen_label_rtx ();
4291 /* Check the instruction stream at the normal return address for the
4294 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4295 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4296 0x00011820 | stub+16: mtsp r1,sr0
4297 0xe0400002 | stub+20: be,n 0(sr0,rp)
4299 If it is an export stub, than our return address is really in
4302 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4303 NULL_RTX, SImode, 1);
4304 emit_jump_insn (gen_bne (label));
4306 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4307 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4308 emit_jump_insn (gen_bne (label));
4310 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4311 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4312 emit_jump_insn (gen_bne (label));
4314 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4315 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
4317 /* If there is no export stub then just use the value saved from
4318 the return pointer register. */
4320 emit_jump_insn (gen_bne (label));
4322 /* Here we know that our return address points to an export
4323 stub. We don't want to return the address of the export stub,
4324 but rather the return address of the export stub. That return
4325 address is stored at -24[frameaddr]. */
4327 emit_move_insn (saved_rp,
4329 memory_address (Pmode,
4330 plus_constant (frameaddr,
4337 /* This is only valid once reload has completed because it depends on
4338 knowing exactly how much (if any) frame there is and...
4340 It's only valid if there is no frame marker to de-allocate and...
4342 It's only valid if %r2 hasn't been saved into the caller's frame
4343 (we're not profiling and %r2 isn't live anywhere). */
4345 hppa_can_use_return_insn_p (void)
4347 return (reload_completed
4348 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
4349 && ! regs_ever_live[2]
4350 && ! frame_pointer_needed);
4354 emit_bcond_fp (enum rtx_code code, rtx operand0)
4356 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4357 gen_rtx_IF_THEN_ELSE (VOIDmode,
4358 gen_rtx_fmt_ee (code,
4360 gen_rtx_REG (CCFPmode, 0),
4362 gen_rtx_LABEL_REF (VOIDmode, operand0),
4368 gen_cmp_fp (enum rtx_code code, rtx operand0, rtx operand1)
4370 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4371 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4374 /* Adjust the cost of a scheduling dependency. Return the new cost of
4375 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4378 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4380 enum attr_type attr_type;
4382 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4383 true dependencies as they are described with bypasses now. */
4384 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4387 if (! recog_memoized (insn))
4390 attr_type = get_attr_type (insn);
4392 switch (REG_NOTE_KIND (link))
4395 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4398 if (attr_type == TYPE_FPLOAD)
4400 rtx pat = PATTERN (insn);
4401 rtx dep_pat = PATTERN (dep_insn);
4402 if (GET_CODE (pat) == PARALLEL)
4404 /* This happens for the fldXs,mb patterns. */
4405 pat = XVECEXP (pat, 0, 0);
4407 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4408 /* If this happens, we have to extend this to schedule
4409 optimally. Return 0 for now. */
4412 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4414 if (! recog_memoized (dep_insn))
4416 switch (get_attr_type (dep_insn))
4423 case TYPE_FPSQRTSGL:
4424 case TYPE_FPSQRTDBL:
4425 /* A fpload can't be issued until one cycle before a
4426 preceding arithmetic operation has finished if
4427 the target of the fpload is any of the sources
4428 (or destination) of the arithmetic operation. */
4429 return insn_default_latency (dep_insn) - 1;
4436 else if (attr_type == TYPE_FPALU)
4438 rtx pat = PATTERN (insn);
4439 rtx dep_pat = PATTERN (dep_insn);
4440 if (GET_CODE (pat) == PARALLEL)
4442 /* This happens for the fldXs,mb patterns. */
4443 pat = XVECEXP (pat, 0, 0);
4445 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4446 /* If this happens, we have to extend this to schedule
4447 optimally. Return 0 for now. */
4450 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4452 if (! recog_memoized (dep_insn))
4454 switch (get_attr_type (dep_insn))
4458 case TYPE_FPSQRTSGL:
4459 case TYPE_FPSQRTDBL:
4460 /* An ALU flop can't be issued until two cycles before a
4461 preceding divide or sqrt operation has finished if
4462 the target of the ALU flop is any of the sources
4463 (or destination) of the divide or sqrt operation. */
4464 return insn_default_latency (dep_insn) - 2;
4472 /* For other anti dependencies, the cost is 0. */
4475 case REG_DEP_OUTPUT:
4476 /* Output dependency; DEP_INSN writes a register that INSN writes some
4478 if (attr_type == TYPE_FPLOAD)
4480 rtx pat = PATTERN (insn);
4481 rtx dep_pat = PATTERN (dep_insn);
4482 if (GET_CODE (pat) == PARALLEL)
4484 /* This happens for the fldXs,mb patterns. */
4485 pat = XVECEXP (pat, 0, 0);
4487 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4488 /* If this happens, we have to extend this to schedule
4489 optimally. Return 0 for now. */
4492 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4494 if (! recog_memoized (dep_insn))
4496 switch (get_attr_type (dep_insn))
4503 case TYPE_FPSQRTSGL:
4504 case TYPE_FPSQRTDBL:
4505 /* A fpload can't be issued until one cycle before a
4506 preceding arithmetic operation has finished if
4507 the target of the fpload is the destination of the
4508 arithmetic operation.
4510 Exception: For PA7100LC, PA7200 and PA7300, the cost
4511 is 3 cycles, unless they bundle together. We also
4512 pay the penalty if the second insn is a fpload. */
4513 return insn_default_latency (dep_insn) - 1;
4520 else if (attr_type == TYPE_FPALU)
4522 rtx pat = PATTERN (insn);
4523 rtx dep_pat = PATTERN (dep_insn);
4524 if (GET_CODE (pat) == PARALLEL)
4526 /* This happens for the fldXs,mb patterns. */
4527 pat = XVECEXP (pat, 0, 0);
4529 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4530 /* If this happens, we have to extend this to schedule
4531 optimally. Return 0 for now. */
4534 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4536 if (! recog_memoized (dep_insn))
4538 switch (get_attr_type (dep_insn))
4542 case TYPE_FPSQRTSGL:
4543 case TYPE_FPSQRTDBL:
4544 /* An ALU flop can't be issued until two cycles before a
4545 preceding divide or sqrt operation has finished if
4546 the target of the ALU flop is also the target of
4547 the divide or sqrt operation. */
4548 return insn_default_latency (dep_insn) - 2;
4556 /* For other output dependencies, the cost is 0. */
4564 /* Adjust scheduling priorities. We use this to try and keep addil
4565 and the next use of %r1 close together. */
4567 pa_adjust_priority (rtx insn, int priority)
4569 rtx set = single_set (insn);
4573 src = SET_SRC (set);
4574 dest = SET_DEST (set);
4575 if (GET_CODE (src) == LO_SUM
4576 && symbolic_operand (XEXP (src, 1), VOIDmode)
4577 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4580 else if (GET_CODE (src) == MEM
4581 && GET_CODE (XEXP (src, 0)) == LO_SUM
4582 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4583 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4586 else if (GET_CODE (dest) == MEM
4587 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4588 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4589 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4595 /* The 700 can only issue a single insn at a time.
4596 The 7XXX processors can issue two insns at a time.
4597 The 8000 can issue 4 insns at a time. */
4599 pa_issue_rate (void)
4603 case PROCESSOR_700: return 1;
4604 case PROCESSOR_7100: return 2;
4605 case PROCESSOR_7100LC: return 2;
4606 case PROCESSOR_7200: return 2;
4607 case PROCESSOR_7300: return 2;
4608 case PROCESSOR_8000: return 4;
4617 /* Return any length adjustment needed by INSN which already has its length
4618 computed as LENGTH. Return zero if no adjustment is necessary.
4620 For the PA: function calls, millicode calls, and backwards short
4621 conditional branches with unfilled delay slots need an adjustment by +1
4622 (to account for the NOP which will be inserted into the instruction stream).
4624 Also compute the length of an inline block move here as it is too
4625 complicated to express as a length attribute in pa.md. */
4627 pa_adjust_insn_length (rtx insn, int length)
4629 rtx pat = PATTERN (insn);
4631 /* Jumps inside switch tables which have unfilled delay slots need
4633 if (GET_CODE (insn) == JUMP_INSN
4634 && GET_CODE (pat) == PARALLEL
4635 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4637 /* Millicode insn with an unfilled delay slot. */
4638 else if (GET_CODE (insn) == INSN
4639 && GET_CODE (pat) != SEQUENCE
4640 && GET_CODE (pat) != USE
4641 && GET_CODE (pat) != CLOBBER
4642 && get_attr_type (insn) == TYPE_MILLI)
4644 /* Block move pattern. */
4645 else if (GET_CODE (insn) == INSN
4646 && GET_CODE (pat) == PARALLEL
4647 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4648 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4649 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4650 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4651 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4652 return compute_movmem_length (insn) - 4;
4653 /* Block clear pattern. */
4654 else if (GET_CODE (insn) == INSN
4655 && GET_CODE (pat) == PARALLEL
4656 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4657 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4658 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4659 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4660 return compute_clrmem_length (insn) - 4;
4661 /* Conditional branch with an unfilled delay slot. */
4662 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4664 /* Adjust a short backwards conditional with an unfilled delay slot. */
4665 if (GET_CODE (pat) == SET
4667 && ! forward_branch_p (insn))
4669 else if (GET_CODE (pat) == PARALLEL
4670 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4673 /* Adjust dbra insn with short backwards conditional branch with
4674 unfilled delay slot -- only for case where counter is in a
4675 general register register. */
4676 else if (GET_CODE (pat) == PARALLEL
4677 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4678 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4679 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4681 && ! forward_branch_p (insn))
4689 /* Print operand X (an rtx) in assembler syntax to file FILE.
4690 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4691 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4694 print_operand (FILE *file, rtx x, int code)
4699 /* Output a 'nop' if there's nothing for the delay slot. */
4700 if (dbr_sequence_length () == 0)
4701 fputs ("\n\tnop", file);
4704 /* Output a nullification completer if there's nothing for the */
4705 /* delay slot or nullification is requested. */
4706 if (dbr_sequence_length () == 0 ||
4708 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4712 /* Print out the second register name of a register pair.
4713 I.e., R (6) => 7. */
4714 fputs (reg_names[REGNO (x) + 1], file);
4717 /* A register or zero. */
4719 || (x == CONST0_RTX (DFmode))
4720 || (x == CONST0_RTX (SFmode)))
4722 fputs ("%r0", file);
4728 /* A register or zero (floating point). */
4730 || (x == CONST0_RTX (DFmode))
4731 || (x == CONST0_RTX (SFmode)))
4733 fputs ("%fr0", file);
4742 xoperands[0] = XEXP (XEXP (x, 0), 0);
4743 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4744 output_global_address (file, xoperands[1], 0);
4745 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4749 case 'C': /* Plain (C)ondition */
4751 switch (GET_CODE (x))
4754 fputs ("=", file); break;
4756 fputs ("<>", file); break;
4758 fputs (">", file); break;
4760 fputs (">=", file); break;
4762 fputs (">>=", file); break;
4764 fputs (">>", file); break;
4766 fputs ("<", file); break;
4768 fputs ("<=", file); break;
4770 fputs ("<<=", file); break;
4772 fputs ("<<", file); break;
4777 case 'N': /* Condition, (N)egated */
4778 switch (GET_CODE (x))
4781 fputs ("<>", file); break;
4783 fputs ("=", file); break;
4785 fputs ("<=", file); break;
4787 fputs ("<", file); break;
4789 fputs ("<<", file); break;
4791 fputs ("<<=", file); break;
4793 fputs (">=", file); break;
4795 fputs (">", file); break;
4797 fputs (">>", file); break;
4799 fputs (">>=", file); break;
4804 /* For floating point comparisons. Note that the output
4805 predicates are the complement of the desired mode. The
4806 conditions for GT, GE, LT, LE and LTGT cause an invalid
4807 operation exception if the result is unordered and this
4808 exception is enabled in the floating-point status register. */
4810 switch (GET_CODE (x))
4813 fputs ("!=", file); break;
4815 fputs ("=", file); break;
4817 fputs ("!>", file); break;
4819 fputs ("!>=", file); break;
4821 fputs ("!<", file); break;
4823 fputs ("!<=", file); break;
4825 fputs ("!<>", file); break;
4827 fputs ("!?<=", file); break;
4829 fputs ("!?<", file); break;
4831 fputs ("!?>=", file); break;
4833 fputs ("!?>", file); break;
4835 fputs ("!?=", file); break;
4837 fputs ("!?", file); break;
4839 fputs ("?", file); break;
4844 case 'S': /* Condition, operands are (S)wapped. */
4845 switch (GET_CODE (x))
4848 fputs ("=", file); break;
4850 fputs ("<>", file); break;
4852 fputs ("<", file); break;
4854 fputs ("<=", file); break;
4856 fputs ("<<=", file); break;
4858 fputs ("<<", file); break;
4860 fputs (">", file); break;
4862 fputs (">=", file); break;
4864 fputs (">>=", file); break;
4866 fputs (">>", file); break;
4871 case 'B': /* Condition, (B)oth swapped and negate. */
4872 switch (GET_CODE (x))
4875 fputs ("<>", file); break;
4877 fputs ("=", file); break;
4879 fputs (">=", file); break;
4881 fputs (">", file); break;
4883 fputs (">>", file); break;
4885 fputs (">>=", file); break;
4887 fputs ("<=", file); break;
4889 fputs ("<", file); break;
4891 fputs ("<<", file); break;
4893 fputs ("<<=", file); break;
4899 gcc_assert (GET_CODE (x) == CONST_INT);
4900 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4903 gcc_assert (GET_CODE (x) == CONST_INT);
4904 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4907 gcc_assert (GET_CODE (x) == CONST_INT);
4908 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4911 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
4912 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4915 gcc_assert (GET_CODE (x) == CONST_INT);
4916 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4919 gcc_assert (GET_CODE (x) == CONST_INT);
4920 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4923 if (GET_CODE (x) == CONST_INT)
4928 switch (GET_CODE (XEXP (x, 0)))
4932 if (ASSEMBLER_DIALECT == 0)
4933 fputs ("s,mb", file);
4935 fputs (",mb", file);
4939 if (ASSEMBLER_DIALECT == 0)
4940 fputs ("s,ma", file);
4942 fputs (",ma", file);
4945 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
4946 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
4948 if (ASSEMBLER_DIALECT == 0)
4951 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4952 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4954 if (ASSEMBLER_DIALECT == 0)
4955 fputs ("x,s", file);
4959 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4963 if (code == 'F' && ASSEMBLER_DIALECT == 0)
4969 output_global_address (file, x, 0);
4972 output_global_address (file, x, 1);
4974 case 0: /* Don't do anything special */
4979 compute_zdepwi_operands (INTVAL (x), op);
4980 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4986 compute_zdepdi_operands (INTVAL (x), op);
4987 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4991 /* We can get here from a .vtable_inherit due to our
4992 CONSTANT_ADDRESS_P rejecting perfectly good constant
4998 if (GET_CODE (x) == REG)
5000 fputs (reg_names [REGNO (x)], file);
5001 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5007 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5008 && (REGNO (x) & 1) == 0)
5011 else if (GET_CODE (x) == MEM)
5013 int size = GET_MODE_SIZE (GET_MODE (x));
5014 rtx base = NULL_RTX;
5015 switch (GET_CODE (XEXP (x, 0)))
5019 base = XEXP (XEXP (x, 0), 0);
5020 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5024 base = XEXP (XEXP (x, 0), 0);
5025 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5028 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5029 fprintf (file, "%s(%s)",
5030 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5031 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5032 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5033 fprintf (file, "%s(%s)",
5034 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5035 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5036 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5037 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5039 /* Because the REG_POINTER flag can get lost during reload,
5040 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5041 index and base registers in the combined move patterns. */
5042 rtx base = XEXP (XEXP (x, 0), 1);
5043 rtx index = XEXP (XEXP (x, 0), 0);
5045 fprintf (file, "%s(%s)",
5046 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5049 output_address (XEXP (x, 0));
5052 output_address (XEXP (x, 0));
5057 output_addr_const (file, x);
5060 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5063 output_global_address (FILE *file, rtx x, int round_constant)
5066 /* Imagine (high (const (plus ...))). */
5067 if (GET_CODE (x) == HIGH)
5070 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5071 output_addr_const (file, x);
5072 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5074 output_addr_const (file, x);
5075 fputs ("-$global$", file);
5077 else if (GET_CODE (x) == CONST)
5079 const char *sep = "";
5080 int offset = 0; /* assembler wants -$global$ at end */
5081 rtx base = NULL_RTX;
5083 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5086 base = XEXP (XEXP (x, 0), 0);
5087 output_addr_const (file, base);
5090 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5096 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5099 base = XEXP (XEXP (x, 0), 1);
5100 output_addr_const (file, base);
5103 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5109 /* How bogus. The compiler is apparently responsible for
5110 rounding the constant if it uses an LR field selector.
5112 The linker and/or assembler seem a better place since
5113 they have to do this kind of thing already.
5115 If we fail to do this, HP's optimizing linker may eliminate
5116 an addil, but not update the ldw/stw/ldo instruction that
5117 uses the result of the addil. */
5119 offset = ((offset + 0x1000) & ~0x1fff);
5121 switch (GET_CODE (XEXP (x, 0)))
5134 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5142 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5143 fputs ("-$global$", file);
5145 fprintf (file, "%s%d", sep, offset);
5148 output_addr_const (file, x);
5151 /* Output boilerplate text to appear at the beginning of the file.
5152 There are several possible versions. */
5153 #define aputs(x) fputs(x, asm_out_file)
5155 pa_file_start_level (void)
5158 aputs ("\t.LEVEL 2.0w\n");
5159 else if (TARGET_PA_20)
5160 aputs ("\t.LEVEL 2.0\n");
5161 else if (TARGET_PA_11)
5162 aputs ("\t.LEVEL 1.1\n");
5164 aputs ("\t.LEVEL 1.0\n");
5168 pa_file_start_space (int sortspace)
5170 aputs ("\t.SPACE $PRIVATE$");
5173 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5174 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5175 "\n\t.SPACE $TEXT$");
5178 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5179 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5183 pa_file_start_file (int want_version)
5185 if (write_symbols != NO_DEBUG)
5187 output_file_directive (asm_out_file, main_input_filename);
5189 aputs ("\t.version\t\"01.01\"\n");
5194 pa_file_start_mcount (const char *aswhat)
5197 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5201 pa_elf_file_start (void)
5203 pa_file_start_level ();
5204 pa_file_start_mcount ("ENTRY");
5205 pa_file_start_file (0);
5209 pa_som_file_start (void)
5211 pa_file_start_level ();
5212 pa_file_start_space (0);
5213 aputs ("\t.IMPORT $global$,DATA\n"
5214 "\t.IMPORT $$dyncall,MILLICODE\n");
5215 pa_file_start_mcount ("CODE");
5216 pa_file_start_file (0);
5220 pa_linux_file_start (void)
5222 pa_file_start_file (1);
5223 pa_file_start_level ();
5224 pa_file_start_mcount ("CODE");
5228 pa_hpux64_gas_file_start (void)
5230 pa_file_start_level ();
5231 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5233 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5235 pa_file_start_file (1);
5239 pa_hpux64_hpas_file_start (void)
5241 pa_file_start_level ();
5242 pa_file_start_space (1);
5243 pa_file_start_mcount ("CODE");
5244 pa_file_start_file (0);
5248 /* Search the deferred plabel list for SYMBOL and return its internal
5249 label. If an entry for SYMBOL is not found, a new entry is created. */
5252 get_deferred_plabel (rtx symbol)
5254 const char *fname = XSTR (symbol, 0);
5257 /* See if we have already put this function on the list of deferred
5258 plabels. This list is generally small, so a liner search is not
5259 too ugly. If it proves too slow replace it with something faster. */
5260 for (i = 0; i < n_deferred_plabels; i++)
5261 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5264 /* If the deferred plabel list is empty, or this entry was not found
5265 on the list, create a new entry on the list. */
5266 if (deferred_plabels == NULL || i == n_deferred_plabels)
5270 if (deferred_plabels == 0)
5271 deferred_plabels = (struct deferred_plabel *)
5272 ggc_alloc (sizeof (struct deferred_plabel));
5274 deferred_plabels = (struct deferred_plabel *)
5275 ggc_realloc (deferred_plabels,
5276 ((n_deferred_plabels + 1)
5277 * sizeof (struct deferred_plabel)));
5279 i = n_deferred_plabels++;
5280 deferred_plabels[i].internal_label = gen_label_rtx ();
5281 deferred_plabels[i].symbol = symbol;
5283 /* Gross. We have just implicitly taken the address of this
5284 function. Mark it in the same manner as assemble_name. */
5285 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5287 mark_referenced (id);
5290 return deferred_plabels[i].internal_label;
5294 output_deferred_plabels (void)
5297 /* If we have deferred plabels, then we need to switch into the data
5298 section and align it to a 4 byte boundary before we output the
5299 deferred plabels. */
5300 if (n_deferred_plabels)
5303 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5306 /* Now output the deferred plabels. */
5307 for (i = 0; i < n_deferred_plabels; i++)
5309 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5310 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5311 assemble_integer (deferred_plabels[i].symbol,
5312 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5316 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5317 /* Initialize optabs to point to HPUX long double emulation routines. */
5319 pa_hpux_init_libfuncs (void)
5321 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5322 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5323 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5324 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5325 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5326 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5327 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5328 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5329 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5331 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5332 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5333 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5334 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5335 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5336 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5337 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5339 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5340 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5341 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5342 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5344 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5345 ? "__U_Qfcnvfxt_quad_to_sgl"
5346 : "_U_Qfcnvfxt_quad_to_sgl");
5347 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5348 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5349 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5351 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5352 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5356 /* HP's millicode routines mean something special to the assembler.
5357 Keep track of which ones we have used. */
5359 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5360 static void import_milli (enum millicodes);
5361 static char imported[(int) end1000];
5362 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5363 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5364 #define MILLI_START 10
5367 import_milli (enum millicodes code)
5369 char str[sizeof (import_string)];
5371 if (!imported[(int) code])
5373 imported[(int) code] = 1;
5374 strcpy (str, import_string);
5375 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5376 output_asm_insn (str, 0);
5380 /* The register constraints have put the operands and return value in
5381 the proper registers. */
5384 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5386 import_milli (mulI);
5387 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5390 /* Emit the rtl for doing a division by a constant. */
5392 /* Do magic division millicodes exist for this value? */
5393 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5395 /* We'll use an array to keep track of the magic millicodes and
5396 whether or not we've used them already. [n][0] is signed, [n][1] is
5399 static int div_milli[16][2];
5402 emit_hpdiv_const (rtx *operands, int unsignedp)
5404 if (GET_CODE (operands[2]) == CONST_INT
5405 && INTVAL (operands[2]) > 0
5406 && INTVAL (operands[2]) < 16
5407 && magic_milli[INTVAL (operands[2])])
5409 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5411 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5415 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5416 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5418 gen_rtx_REG (SImode, 26),
5420 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5421 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5422 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5423 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5424 gen_rtx_CLOBBER (VOIDmode, ret))));
5425 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5432 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5436 /* If the divisor is a constant, try to use one of the special
5438 if (GET_CODE (operands[0]) == CONST_INT)
5440 static char buf[100];
5441 divisor = INTVAL (operands[0]);
5442 if (!div_milli[divisor][unsignedp])
5444 div_milli[divisor][unsignedp] = 1;
5446 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5448 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5452 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5453 INTVAL (operands[0]));
5454 return output_millicode_call (insn,
5455 gen_rtx_SYMBOL_REF (SImode, buf));
5459 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5460 INTVAL (operands[0]));
5461 return output_millicode_call (insn,
5462 gen_rtx_SYMBOL_REF (SImode, buf));
5465 /* Divisor isn't a special constant. */
5470 import_milli (divU);
5471 return output_millicode_call (insn,
5472 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5476 import_milli (divI);
5477 return output_millicode_call (insn,
5478 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5483 /* Output a $$rem millicode to do mod. */
5486 output_mod_insn (int unsignedp, rtx insn)
5490 import_milli (remU);
5491 return output_millicode_call (insn,
5492 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5496 import_milli (remI);
5497 return output_millicode_call (insn,
5498 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5503 output_arg_descriptor (rtx call_insn)
5505 const char *arg_regs[4];
5506 enum machine_mode arg_mode;
5508 int i, output_flag = 0;
5511 /* We neither need nor want argument location descriptors for the
5512 64bit runtime environment or the ELF32 environment. */
5513 if (TARGET_64BIT || TARGET_ELF32)
5516 for (i = 0; i < 4; i++)
5519 /* Specify explicitly that no argument relocations should take place
5520 if using the portable runtime calling conventions. */
5521 if (TARGET_PORTABLE_RUNTIME)
5523 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5528 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5529 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5530 link; link = XEXP (link, 1))
5532 rtx use = XEXP (link, 0);
5534 if (! (GET_CODE (use) == USE
5535 && GET_CODE (XEXP (use, 0)) == REG
5536 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5539 arg_mode = GET_MODE (XEXP (use, 0));
5540 regno = REGNO (XEXP (use, 0));
5541 if (regno >= 23 && regno <= 26)
5543 arg_regs[26 - regno] = "GR";
5544 if (arg_mode == DImode)
5545 arg_regs[25 - regno] = "GR";
5547 else if (regno >= 32 && regno <= 39)
5549 if (arg_mode == SFmode)
5550 arg_regs[(regno - 32) / 2] = "FR";
5553 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5554 arg_regs[(regno - 34) / 2] = "FR";
5555 arg_regs[(regno - 34) / 2 + 1] = "FU";
5557 arg_regs[(regno - 34) / 2] = "FU";
5558 arg_regs[(regno - 34) / 2 + 1] = "FR";
5563 fputs ("\t.CALL ", asm_out_file);
5564 for (i = 0; i < 4; i++)
5569 fputc (',', asm_out_file);
5570 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5573 fputc ('\n', asm_out_file);
5576 static enum reg_class
5577 pa_secondary_reload (bool in_p, rtx x, enum reg_class class,
5578 enum machine_mode mode, secondary_reload_info *sri)
5580 int is_symbolic, regno;
5582 /* Handle the easy stuff first. */
5583 if (class == R1_REGS)
5589 if (class == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5595 /* If we have something like (mem (mem (...)), we can safely assume the
5596 inner MEM will end up in a general register after reloading, so there's
5597 no need for a secondary reload. */
5598 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5601 /* Trying to load a constant into a FP register during PIC code
5602 generation requires %r1 as a scratch register. */
5604 && GET_MODE_CLASS (mode) == MODE_INT
5605 && FP_REG_CLASS_P (class)
5606 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5608 gcc_assert (mode == SImode || mode == DImode);
5609 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5610 : CODE_FOR_reload_indi_r1);
5614 /* Profiling showed the PA port spends about 1.3% of its compilation
5615 time in true_regnum from calls inside pa_secondary_reload_class. */
5616 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5617 regno = true_regnum (x);
5619 /* Handle out of range displacement for integer mode loads/stores of
5621 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5622 && GET_MODE_CLASS (mode) == MODE_INT
5623 && FP_REG_CLASS_P (class))
5624 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5626 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5630 /* A SAR<->FP register copy requires a secondary register (GPR) as
5631 well as secondary memory. */
5632 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5633 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5634 || (class == SHIFT_REGS
5635 && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5637 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5641 /* Secondary reloads of symbolic operands require %r1 as a scratch
5642 register when we're generating PIC code and the operand isn't
5644 if (GET_CODE (x) == HIGH)
5647 /* Profiling has showed GCC spends about 2.6% of its compilation
5648 time in symbolic_operand from calls inside pa_secondary_reload_class.
5649 So, we use an inline copy to avoid useless work. */
5650 switch (GET_CODE (x))
5655 is_symbolic = !SYMBOL_REF_TLS_MODEL (x);
5662 is_symbolic = (((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
5663 && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
5664 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
5665 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5672 if (is_symbolic && (flag_pic || !read_only_operand (x, VOIDmode)))
5674 gcc_assert (mode == SImode || mode == DImode);
5675 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5676 : CODE_FOR_reload_indi_r1);
5682 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5683 by invisible reference. As a GCC extension, we also pass anything
5684 with a zero or variable size by reference.
5686 The 64-bit runtime does not describe passing any types by invisible
5687 reference. The internals of GCC can't currently handle passing
5688 empty structures, and zero or variable length arrays when they are
5689 not passed entirely on the stack or by reference. Thus, as a GCC
5690 extension, we pass these types by reference. The HP compiler doesn't
5691 support these types, so hopefully there shouldn't be any compatibility
5692 issues. This may have to be revisited when HP releases a C99 compiler
5693 or updates the ABI. */
5696 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5697 enum machine_mode mode, tree type,
5698 bool named ATTRIBUTE_UNUSED)
5703 size = int_size_in_bytes (type);
5705 size = GET_MODE_SIZE (mode);
5710 return size <= 0 || size > 8;
5714 function_arg_padding (enum machine_mode mode, tree type)
5717 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5719 /* Return none if justification is not required. */
5721 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5722 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5725 /* The directions set here are ignored when a BLKmode argument larger
5726 than a word is placed in a register. Different code is used for
5727 the stack and registers. This makes it difficult to have a
5728 consistent data representation for both the stack and registers.
5729 For both runtimes, the justification and padding for arguments on
5730 the stack and in registers should be identical. */
5732 /* The 64-bit runtime specifies left justification for aggregates. */
5735 /* The 32-bit runtime architecture specifies right justification.
5736 When the argument is passed on the stack, the argument is padded
5737 with garbage on the left. The HP compiler pads with zeros. */
5741 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5748 /* Do what is necessary for `va_start'. We look at the current function
5749 to determine if stdargs or varargs is used and fill in an initial
5750 va_list. A pointer to this constructor is returned. */
5753 hppa_builtin_saveregs (void)
5756 tree fntype = TREE_TYPE (current_function_decl);
5757 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5758 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5759 != void_type_node)))
5760 ? UNITS_PER_WORD : 0);
5763 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5765 offset = current_function_arg_offset_rtx;
5771 /* Adjust for varargs/stdarg differences. */
5773 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5775 offset = current_function_arg_offset_rtx;
5777 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5778 from the incoming arg pointer and growing to larger addresses. */
5779 for (i = 26, off = -64; i >= 19; i--, off += 8)
5780 emit_move_insn (gen_rtx_MEM (word_mode,
5781 plus_constant (arg_pointer_rtx, off)),
5782 gen_rtx_REG (word_mode, i));
5784 /* The incoming args pointer points just beyond the flushback area;
5785 normally this is not a serious concern. However, when we are doing
5786 varargs/stdargs we want to make the arg pointer point to the start
5787 of the incoming argument area. */
5788 emit_move_insn (virtual_incoming_args_rtx,
5789 plus_constant (arg_pointer_rtx, -64));
5791 /* Now return a pointer to the first anonymous argument. */
5792 return copy_to_reg (expand_binop (Pmode, add_optab,
5793 virtual_incoming_args_rtx,
5794 offset, 0, 0, OPTAB_LIB_WIDEN));
5797 /* Store general registers on the stack. */
5798 dest = gen_rtx_MEM (BLKmode,
5799 plus_constant (current_function_internal_arg_pointer,
5801 set_mem_alias_set (dest, get_varargs_alias_set ());
5802 set_mem_align (dest, BITS_PER_WORD);
5803 move_block_from_reg (23, dest, 4);
5805 /* move_block_from_reg will emit code to store the argument registers
5806 individually as scalar stores.
5808 However, other insns may later load from the same addresses for
5809 a structure load (passing a struct to a varargs routine).
5811 The alias code assumes that such aliasing can never happen, so we
5812 have to keep memory referencing insns from moving up beyond the
5813 last argument register store. So we emit a blockage insn here. */
5814 emit_insn (gen_blockage ());
5816 return copy_to_reg (expand_binop (Pmode, add_optab,
5817 current_function_internal_arg_pointer,
5818 offset, 0, 0, OPTAB_LIB_WIDEN));
5822 hppa_va_start (tree valist, rtx nextarg)
5824 nextarg = expand_builtin_saveregs ();
5825 std_expand_builtin_va_start (valist, nextarg);
5829 hppa_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p, tree *post_p)
5833 /* Args grow upward. We can use the generic routines. */
5834 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5836 else /* !TARGET_64BIT */
5838 tree ptr = build_pointer_type (type);
5841 unsigned int size, ofs;
5844 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
5848 ptr = build_pointer_type (type);
5850 size = int_size_in_bytes (type);
5851 valist_type = TREE_TYPE (valist);
5853 /* Args grow down. Not handled by generic routines. */
5855 u = fold_convert (valist_type, size_in_bytes (type));
5856 t = build2 (MINUS_EXPR, valist_type, valist, u);
5858 /* Copied from va-pa.h, but we probably don't need to align to
5859 word size, since we generate and preserve that invariant. */
5860 u = build_int_cst (valist_type, (size > 4 ? -8 : -4));
5861 t = build2 (BIT_AND_EXPR, valist_type, t, u);
5863 t = build2 (MODIFY_EXPR, valist_type, valist, t);
5865 ofs = (8 - size) % 4;
5868 u = fold_convert (valist_type, size_int (ofs));
5869 t = build2 (PLUS_EXPR, valist_type, t, u);
5872 t = fold_convert (ptr, t);
5873 t = build_va_arg_indirect_ref (t);
5876 t = build_va_arg_indirect_ref (t);
5882 /* True if MODE is valid for the target. By "valid", we mean able to
5883 be manipulated in non-trivial ways. In particular, this means all
5884 the arithmetic is supported.
5886 Currently, TImode is not valid as the HP 64-bit runtime documentation
5887 doesn't document the alignment and calling conventions for this type.
5888 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
5889 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
5892 pa_scalar_mode_supported_p (enum machine_mode mode)
5894 int precision = GET_MODE_PRECISION (mode);
5896 switch (GET_MODE_CLASS (mode))
5898 case MODE_PARTIAL_INT:
5900 if (precision == CHAR_TYPE_SIZE)
5902 if (precision == SHORT_TYPE_SIZE)
5904 if (precision == INT_TYPE_SIZE)
5906 if (precision == LONG_TYPE_SIZE)
5908 if (precision == LONG_LONG_TYPE_SIZE)
5913 if (precision == FLOAT_TYPE_SIZE)
5915 if (precision == DOUBLE_TYPE_SIZE)
5917 if (precision == LONG_DOUBLE_TYPE_SIZE)
5926 /* This routine handles all the normal conditional branch sequences we
5927 might need to generate. It handles compare immediate vs compare
5928 register, nullification of delay slots, varying length branches,
5929 negated branches, and all combinations of the above. It returns the
5930 output appropriate to emit the branch corresponding to all given
5934 output_cbranch (rtx *operands, int nullify, int length, int negated, rtx insn)
5936 static char buf[100];
5940 /* A conditional branch to the following instruction (e.g. the delay slot)
5941 is asking for a disaster. This can happen when not optimizing and
5942 when jump optimization fails.
5944 While it is usually safe to emit nothing, this can fail if the
5945 preceding instruction is a nullified branch with an empty delay
5946 slot and the same branch target as this branch. We could check
5947 for this but jump optimization should eliminate nop jumps. It
5948 is always safe to emit a nop. */
5949 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
5952 /* The doubleword form of the cmpib instruction doesn't have the LEU
5953 and GTU conditions while the cmpb instruction does. Since we accept
5954 zero for cmpb, we must ensure that we use cmpb for the comparison. */
5955 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
5956 operands[2] = gen_rtx_REG (DImode, 0);
5957 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
5958 operands[1] = gen_rtx_REG (DImode, 0);
5960 /* If this is a long branch with its delay slot unfilled, set `nullify'
5961 as it can nullify the delay slot and save a nop. */
5962 if (length == 8 && dbr_sequence_length () == 0)
5965 /* If this is a short forward conditional branch which did not get
5966 its delay slot filled, the delay slot can still be nullified. */
5967 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5968 nullify = forward_branch_p (insn);
5970 /* A forward branch over a single nullified insn can be done with a
5971 comclr instruction. This avoids a single cycle penalty due to
5972 mis-predicted branch if we fall through (branch not taken). */
5974 && next_real_insn (insn) != 0
5975 && get_attr_length (next_real_insn (insn)) == 4
5976 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5982 /* All short conditional branches except backwards with an unfilled
5986 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5988 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5989 if (GET_MODE (operands[1]) == DImode)
5992 strcat (buf, "%B3");
5994 strcat (buf, "%S3");
5996 strcat (buf, " %2,%r1,%%r0");
5998 strcat (buf, ",n %2,%r1,%0");
6000 strcat (buf, " %2,%r1,%0");
6003 /* All long conditionals. Note a short backward branch with an
6004 unfilled delay slot is treated just like a long backward branch
6005 with an unfilled delay slot. */
6007 /* Handle weird backwards branch with a filled delay slot
6008 with is nullified. */
6009 if (dbr_sequence_length () != 0
6010 && ! forward_branch_p (insn)
6013 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6014 if (GET_MODE (operands[1]) == DImode)
6017 strcat (buf, "%S3");
6019 strcat (buf, "%B3");
6020 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6022 /* Handle short backwards branch with an unfilled delay slot.
6023 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6024 taken and untaken branches. */
6025 else if (dbr_sequence_length () == 0
6026 && ! forward_branch_p (insn)
6027 && INSN_ADDRESSES_SET_P ()
6028 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6029 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6031 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6032 if (GET_MODE (operands[1]) == DImode)
6035 strcat (buf, "%B3 %2,%r1,%0%#");
6037 strcat (buf, "%S3 %2,%r1,%0%#");
6041 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6042 if (GET_MODE (operands[1]) == DImode)
6045 strcat (buf, "%S3");
6047 strcat (buf, "%B3");
6049 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6051 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6057 xoperands[0] = operands[0];
6058 xoperands[1] = operands[1];
6059 xoperands[2] = operands[2];
6060 xoperands[3] = operands[3];
6062 /* The reversed conditional branch must branch over one additional
6063 instruction if the delay slot is filled. If the delay slot
6064 is empty, the instruction after the reversed condition branch
6065 must be nullified. */
6066 nullify = dbr_sequence_length () == 0;
6067 xoperands[4] = nullify ? GEN_INT (length) : GEN_INT (length + 4);
6069 /* Create a reversed conditional branch which branches around
6070 the following insns. */
6071 if (GET_MODE (operands[1]) != DImode)
6077 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6080 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6086 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6089 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6098 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6101 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6107 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6110 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6114 output_asm_insn (buf, xoperands);
6115 return output_lbranch (operands[0], insn);
6123 /* This routine handles long unconditional branches that exceed the
6124 maximum range of a simple branch instruction. */
6127 output_lbranch (rtx dest, rtx insn)
6131 xoperands[0] = dest;
6133 /* First, free up the delay slot. */
6134 if (dbr_sequence_length () != 0)
6136 /* We can't handle a jump in the delay slot. */
6137 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6139 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6142 /* Now delete the delay insn. */
6143 PUT_CODE (NEXT_INSN (insn), NOTE);
6144 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6145 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6148 /* Output an insn to save %r1. The runtime documentation doesn't
6149 specify whether the "Clean Up" slot in the callers frame can
6150 be clobbered by the callee. It isn't copied by HP's builtin
6151 alloca, so this suggests that it can be clobbered if necessary.
6152 The "Static Link" location is copied by HP builtin alloca, so
6153 we avoid using it. Using the cleanup slot might be a problem
6154 if we have to interoperate with languages that pass cleanup
6155 information. However, it should be possible to handle these
6156 situations with GCC's asm feature.
6158 The "Current RP" slot is reserved for the called procedure, so
6159 we try to use it when we don't have a frame of our own. It's
6160 rather unlikely that we won't have a frame when we need to emit
6163 Really the way to go long term is a register scavenger; goto
6164 the target of the jump and find a register which we can use
6165 as a scratch to hold the value in %r1. Then, we wouldn't have
6166 to free up the delay slot or clobber a slot that may be needed
6167 for other purposes. */
6170 if (actual_fsize == 0 && !regs_ever_live[2])
6171 /* Use the return pointer slot in the frame marker. */
6172 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6174 /* Use the slot at -40 in the frame marker since HP builtin
6175 alloca doesn't copy it. */
6176 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6180 if (actual_fsize == 0 && !regs_ever_live[2])
6181 /* Use the return pointer slot in the frame marker. */
6182 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6184 /* Use the "Clean Up" slot in the frame marker. In GCC,
6185 the only other use of this location is for copying a
6186 floating point double argument from a floating-point
6187 register to two general registers. The copy is done
6188 as an "atomic" operation when outputting a call, so it
6189 won't interfere with our using the location here. */
6190 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6193 if (TARGET_PORTABLE_RUNTIME)
6195 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6196 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6197 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6201 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6202 if (TARGET_SOM || !TARGET_GAS)
6204 xoperands[1] = gen_label_rtx ();
6205 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6206 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6207 CODE_LABEL_NUMBER (xoperands[1]));
6208 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6212 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6213 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6215 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6218 /* Now output a very long branch to the original target. */
6219 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6221 /* Now restore the value of %r1 in the delay slot. */
6224 if (actual_fsize == 0 && !regs_ever_live[2])
6225 return "ldd -16(%%r30),%%r1";
6227 return "ldd -40(%%r30),%%r1";
6231 if (actual_fsize == 0 && !regs_ever_live[2])
6232 return "ldw -20(%%r30),%%r1";
6234 return "ldw -12(%%r30),%%r1";
6238 /* This routine handles all the branch-on-bit conditional branch sequences we
6239 might need to generate. It handles nullification of delay slots,
6240 varying length branches, negated branches and all combinations of the
6241 above. it returns the appropriate output template to emit the branch. */
6244 output_bb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6245 int negated, rtx insn, int which)
6247 static char buf[100];
6250 /* A conditional branch to the following instruction (e.g. the delay slot) is
6251 asking for a disaster. I do not think this can happen as this pattern
6252 is only used when optimizing; jump optimization should eliminate the
6253 jump. But be prepared just in case. */
6255 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6258 /* If this is a long branch with its delay slot unfilled, set `nullify'
6259 as it can nullify the delay slot and save a nop. */
6260 if (length == 8 && dbr_sequence_length () == 0)
6263 /* If this is a short forward conditional branch which did not get
6264 its delay slot filled, the delay slot can still be nullified. */
6265 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6266 nullify = forward_branch_p (insn);
6268 /* A forward branch over a single nullified insn can be done with a
6269 extrs instruction. This avoids a single cycle penalty due to
6270 mis-predicted branch if we fall through (branch not taken). */
6273 && next_real_insn (insn) != 0
6274 && get_attr_length (next_real_insn (insn)) == 4
6275 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6282 /* All short conditional branches except backwards with an unfilled
6286 strcpy (buf, "{extrs,|extrw,s,}");
6288 strcpy (buf, "bb,");
6289 if (useskip && GET_MODE (operands[0]) == DImode)
6290 strcpy (buf, "extrd,s,*");
6291 else if (GET_MODE (operands[0]) == DImode)
6292 strcpy (buf, "bb,*");
6293 if ((which == 0 && negated)
6294 || (which == 1 && ! negated))
6299 strcat (buf, " %0,%1,1,%%r0");
6300 else if (nullify && negated)
6301 strcat (buf, ",n %0,%1,%3");
6302 else if (nullify && ! negated)
6303 strcat (buf, ",n %0,%1,%2");
6304 else if (! nullify && negated)
6305 strcat (buf, "%0,%1,%3");
6306 else if (! nullify && ! negated)
6307 strcat (buf, " %0,%1,%2");
6310 /* All long conditionals. Note a short backward branch with an
6311 unfilled delay slot is treated just like a long backward branch
6312 with an unfilled delay slot. */
6314 /* Handle weird backwards branch with a filled delay slot
6315 with is nullified. */
6316 if (dbr_sequence_length () != 0
6317 && ! forward_branch_p (insn)
6320 strcpy (buf, "bb,");
6321 if (GET_MODE (operands[0]) == DImode)
6323 if ((which == 0 && negated)
6324 || (which == 1 && ! negated))
6329 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6331 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6333 /* Handle short backwards branch with an unfilled delay slot.
6334 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6335 taken and untaken branches. */
6336 else if (dbr_sequence_length () == 0
6337 && ! forward_branch_p (insn)
6338 && INSN_ADDRESSES_SET_P ()
6339 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6340 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6342 strcpy (buf, "bb,");
6343 if (GET_MODE (operands[0]) == DImode)
6345 if ((which == 0 && negated)
6346 || (which == 1 && ! negated))
6351 strcat (buf, " %0,%1,%3%#");
6353 strcat (buf, " %0,%1,%2%#");
6357 strcpy (buf, "{extrs,|extrw,s,}");
6358 if (GET_MODE (operands[0]) == DImode)
6359 strcpy (buf, "extrd,s,*");
6360 if ((which == 0 && negated)
6361 || (which == 1 && ! negated))
6365 if (nullify && negated)
6366 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6367 else if (nullify && ! negated)
6368 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6370 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6372 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6382 /* This routine handles all the branch-on-variable-bit conditional branch
6383 sequences we might need to generate. It handles nullification of delay
6384 slots, varying length branches, negated branches and all combinations
6385 of the above. it returns the appropriate output template to emit the
6389 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6390 int negated, rtx insn, int which)
6392 static char buf[100];
6395 /* A conditional branch to the following instruction (e.g. the delay slot) is
6396 asking for a disaster. I do not think this can happen as this pattern
6397 is only used when optimizing; jump optimization should eliminate the
6398 jump. But be prepared just in case. */
6400 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6403 /* If this is a long branch with its delay slot unfilled, set `nullify'
6404 as it can nullify the delay slot and save a nop. */
6405 if (length == 8 && dbr_sequence_length () == 0)
6408 /* If this is a short forward conditional branch which did not get
6409 its delay slot filled, the delay slot can still be nullified. */
6410 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6411 nullify = forward_branch_p (insn);
6413 /* A forward branch over a single nullified insn can be done with a
6414 extrs instruction. This avoids a single cycle penalty due to
6415 mis-predicted branch if we fall through (branch not taken). */
6418 && next_real_insn (insn) != 0
6419 && get_attr_length (next_real_insn (insn)) == 4
6420 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6427 /* All short conditional branches except backwards with an unfilled
6431 strcpy (buf, "{vextrs,|extrw,s,}");
6433 strcpy (buf, "{bvb,|bb,}");
6434 if (useskip && GET_MODE (operands[0]) == DImode)
6435 strcpy (buf, "extrd,s,*");
6436 else if (GET_MODE (operands[0]) == DImode)
6437 strcpy (buf, "bb,*");
6438 if ((which == 0 && negated)
6439 || (which == 1 && ! negated))
6444 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6445 else if (nullify && negated)
6446 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6447 else if (nullify && ! negated)
6448 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6449 else if (! nullify && negated)
6450 strcat (buf, "{%0,%3|%0,%%sar,%3}");
6451 else if (! nullify && ! negated)
6452 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6455 /* All long conditionals. Note a short backward branch with an
6456 unfilled delay slot is treated just like a long backward branch
6457 with an unfilled delay slot. */
6459 /* Handle weird backwards branch with a filled delay slot
6460 with is nullified. */
6461 if (dbr_sequence_length () != 0
6462 && ! forward_branch_p (insn)
6465 strcpy (buf, "{bvb,|bb,}");
6466 if (GET_MODE (operands[0]) == DImode)
6468 if ((which == 0 && negated)
6469 || (which == 1 && ! negated))
6474 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6476 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6478 /* Handle short backwards branch with an unfilled delay slot.
6479 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6480 taken and untaken branches. */
6481 else if (dbr_sequence_length () == 0
6482 && ! forward_branch_p (insn)
6483 && INSN_ADDRESSES_SET_P ()
6484 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6485 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6487 strcpy (buf, "{bvb,|bb,}");
6488 if (GET_MODE (operands[0]) == DImode)
6490 if ((which == 0 && negated)
6491 || (which == 1 && ! negated))
6496 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6498 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6502 strcpy (buf, "{vextrs,|extrw,s,}");
6503 if (GET_MODE (operands[0]) == DImode)
6504 strcpy (buf, "extrd,s,*");
6505 if ((which == 0 && negated)
6506 || (which == 1 && ! negated))
6510 if (nullify && negated)
6511 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6512 else if (nullify && ! negated)
6513 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6515 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6517 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6527 /* Return the output template for emitting a dbra type insn.
6529 Note it may perform some output operations on its own before
6530 returning the final output string. */
6532 output_dbra (rtx *operands, rtx insn, int which_alternative)
6535 /* A conditional branch to the following instruction (e.g. the delay slot) is
6536 asking for a disaster. Be prepared! */
6538 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6540 if (which_alternative == 0)
6541 return "ldo %1(%0),%0";
6542 else if (which_alternative == 1)
6544 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6545 output_asm_insn ("ldw -16(%%r30),%4", operands);
6546 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6547 return "{fldws|fldw} -16(%%r30),%0";
6551 output_asm_insn ("ldw %0,%4", operands);
6552 return "ldo %1(%4),%4\n\tstw %4,%0";
6556 if (which_alternative == 0)
6558 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6559 int length = get_attr_length (insn);
6561 /* If this is a long branch with its delay slot unfilled, set `nullify'
6562 as it can nullify the delay slot and save a nop. */
6563 if (length == 8 && dbr_sequence_length () == 0)
6566 /* If this is a short forward conditional branch which did not get
6567 its delay slot filled, the delay slot can still be nullified. */
6568 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6569 nullify = forward_branch_p (insn);
6575 return "addib,%C2,n %1,%0,%3";
6577 return "addib,%C2 %1,%0,%3";
6580 /* Handle weird backwards branch with a fulled delay slot
6581 which is nullified. */
6582 if (dbr_sequence_length () != 0
6583 && ! forward_branch_p (insn)
6585 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6586 /* Handle short backwards branch with an unfilled delay slot.
6587 Using a addb;nop rather than addi;bl saves 1 cycle for both
6588 taken and untaken branches. */
6589 else if (dbr_sequence_length () == 0
6590 && ! forward_branch_p (insn)
6591 && INSN_ADDRESSES_SET_P ()
6592 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6593 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6594 return "addib,%C2 %1,%0,%3%#";
6596 /* Handle normal cases. */
6598 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6600 return "addi,%N2 %1,%0,%0\n\tb %3";
6607 /* Deal with gross reload from FP register case. */
6608 else if (which_alternative == 1)
6610 /* Move loop counter from FP register to MEM then into a GR,
6611 increment the GR, store the GR into MEM, and finally reload
6612 the FP register from MEM from within the branch's delay slot. */
6613 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6615 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6616 if (get_attr_length (insn) == 24)
6617 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6619 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6621 /* Deal with gross reload from memory case. */
6624 /* Reload loop counter from memory, the store back to memory
6625 happens in the branch's delay slot. */
6626 output_asm_insn ("ldw %0,%4", operands);
6627 if (get_attr_length (insn) == 12)
6628 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6630 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6634 /* Return the output template for emitting a dbra type insn.
6636 Note it may perform some output operations on its own before
6637 returning the final output string. */
6639 output_movb (rtx *operands, rtx insn, int which_alternative,
6640 int reverse_comparison)
6643 /* A conditional branch to the following instruction (e.g. the delay slot) is
6644 asking for a disaster. Be prepared! */
6646 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6648 if (which_alternative == 0)
6649 return "copy %1,%0";
6650 else if (which_alternative == 1)
6652 output_asm_insn ("stw %1,-16(%%r30)", operands);
6653 return "{fldws|fldw} -16(%%r30),%0";
6655 else if (which_alternative == 2)
6661 /* Support the second variant. */
6662 if (reverse_comparison)
6663 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6665 if (which_alternative == 0)
6667 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6668 int length = get_attr_length (insn);
6670 /* If this is a long branch with its delay slot unfilled, set `nullify'
6671 as it can nullify the delay slot and save a nop. */
6672 if (length == 8 && dbr_sequence_length () == 0)
6675 /* If this is a short forward conditional branch which did not get
6676 its delay slot filled, the delay slot can still be nullified. */
6677 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6678 nullify = forward_branch_p (insn);
6684 return "movb,%C2,n %1,%0,%3";
6686 return "movb,%C2 %1,%0,%3";
6689 /* Handle weird backwards branch with a filled delay slot
6690 which is nullified. */
6691 if (dbr_sequence_length () != 0
6692 && ! forward_branch_p (insn)
6694 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6696 /* Handle short backwards branch with an unfilled delay slot.
6697 Using a movb;nop rather than or;bl saves 1 cycle for both
6698 taken and untaken branches. */
6699 else if (dbr_sequence_length () == 0
6700 && ! forward_branch_p (insn)
6701 && INSN_ADDRESSES_SET_P ()
6702 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6703 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6704 return "movb,%C2 %1,%0,%3%#";
6705 /* Handle normal cases. */
6707 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6709 return "or,%N2 %1,%%r0,%0\n\tb %3";
6715 /* Deal with gross reload from FP register case. */
6716 else if (which_alternative == 1)
6718 /* Move loop counter from FP register to MEM then into a GR,
6719 increment the GR, store the GR into MEM, and finally reload
6720 the FP register from MEM from within the branch's delay slot. */
6721 output_asm_insn ("stw %1,-16(%%r30)", operands);
6722 if (get_attr_length (insn) == 12)
6723 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6725 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6727 /* Deal with gross reload from memory case. */
6728 else if (which_alternative == 2)
6730 /* Reload loop counter from memory, the store back to memory
6731 happens in the branch's delay slot. */
6732 if (get_attr_length (insn) == 8)
6733 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6735 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6737 /* Handle SAR as a destination. */
6740 if (get_attr_length (insn) == 8)
6741 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6743 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
6747 /* Copy any FP arguments in INSN into integer registers. */
6749 copy_fp_args (rtx insn)
6754 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6756 int arg_mode, regno;
6757 rtx use = XEXP (link, 0);
6759 if (! (GET_CODE (use) == USE
6760 && GET_CODE (XEXP (use, 0)) == REG
6761 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6764 arg_mode = GET_MODE (XEXP (use, 0));
6765 regno = REGNO (XEXP (use, 0));
6767 /* Is it a floating point register? */
6768 if (regno >= 32 && regno <= 39)
6770 /* Copy the FP register into an integer register via memory. */
6771 if (arg_mode == SFmode)
6773 xoperands[0] = XEXP (use, 0);
6774 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6775 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
6776 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6780 xoperands[0] = XEXP (use, 0);
6781 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6782 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
6783 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6784 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6790 /* Compute length of the FP argument copy sequence for INSN. */
6792 length_fp_args (rtx insn)
6797 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6799 int arg_mode, regno;
6800 rtx use = XEXP (link, 0);
6802 if (! (GET_CODE (use) == USE
6803 && GET_CODE (XEXP (use, 0)) == REG
6804 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6807 arg_mode = GET_MODE (XEXP (use, 0));
6808 regno = REGNO (XEXP (use, 0));
6810 /* Is it a floating point register? */
6811 if (regno >= 32 && regno <= 39)
6813 if (arg_mode == SFmode)
6823 /* Return the attribute length for the millicode call instruction INSN.
6824 The length must match the code generated by output_millicode_call.
6825 We include the delay slot in the returned length as it is better to
6826 over estimate the length than to under estimate it. */
6829 attr_length_millicode_call (rtx insn)
6831 unsigned long distance = -1;
6832 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
6834 if (INSN_ADDRESSES_SET_P ())
6836 distance = (total + insn_current_reference_address (insn));
6837 if (distance < total)
6843 if (!TARGET_LONG_CALLS && distance < 7600000)
6848 else if (TARGET_PORTABLE_RUNTIME)
6852 if (!TARGET_LONG_CALLS && distance < 240000)
6855 if (TARGET_LONG_ABS_CALL && !flag_pic)
6862 /* INSN is a function call. It may have an unconditional jump
6865 CALL_DEST is the routine we are calling. */
6868 output_millicode_call (rtx insn, rtx call_dest)
6870 int attr_length = get_attr_length (insn);
6871 int seq_length = dbr_sequence_length ();
6876 xoperands[0] = call_dest;
6877 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6879 /* Handle the common case where we are sure that the branch will
6880 reach the beginning of the $CODE$ subspace. The within reach
6881 form of the $$sh_func_adrs call has a length of 28. Because
6882 it has an attribute type of multi, it never has a nonzero
6883 sequence length. The length of the $$sh_func_adrs is the same
6884 as certain out of reach PIC calls to other routines. */
6885 if (!TARGET_LONG_CALLS
6886 && ((seq_length == 0
6887 && (attr_length == 12
6888 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
6889 || (seq_length != 0 && attr_length == 8)))
6891 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
6897 /* It might seem that one insn could be saved by accessing
6898 the millicode function using the linkage table. However,
6899 this doesn't work in shared libraries and other dynamically
6900 loaded objects. Using a pc-relative sequence also avoids
6901 problems related to the implicit use of the gp register. */
6902 output_asm_insn ("b,l .+8,%%r1", xoperands);
6906 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
6907 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6911 xoperands[1] = gen_label_rtx ();
6912 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6913 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6914 CODE_LABEL_NUMBER (xoperands[1]));
6915 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6918 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6920 else if (TARGET_PORTABLE_RUNTIME)
6922 /* Pure portable runtime doesn't allow be/ble; we also don't
6923 have PIC support in the assembler/linker, so this sequence
6926 /* Get the address of our target into %r1. */
6927 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6928 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6930 /* Get our return address into %r31. */
6931 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
6932 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
6934 /* Jump to our target address in %r1. */
6935 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6939 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6941 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
6943 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6947 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6948 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
6950 if (TARGET_SOM || !TARGET_GAS)
6952 /* The HP assembler can generate relocations for the
6953 difference of two symbols. GAS can do this for a
6954 millicode symbol but not an arbitrary external
6955 symbol when generating SOM output. */
6956 xoperands[1] = gen_label_rtx ();
6957 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6958 CODE_LABEL_NUMBER (xoperands[1]));
6959 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
6960 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
6964 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
6965 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
6969 /* Jump to our target address in %r1. */
6970 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6974 if (seq_length == 0)
6975 output_asm_insn ("nop", xoperands);
6977 /* We are done if there isn't a jump in the delay slot. */
6978 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6981 /* This call has an unconditional jump in its delay slot. */
6982 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6984 /* See if the return address can be adjusted. Use the containing
6985 sequence insn's address. */
6986 if (INSN_ADDRESSES_SET_P ())
6988 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6989 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6990 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
6992 if (VAL_14_BITS_P (distance))
6994 xoperands[1] = gen_label_rtx ();
6995 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
6996 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6997 CODE_LABEL_NUMBER (xoperands[1]));
7000 /* ??? This branch may not reach its target. */
7001 output_asm_insn ("nop\n\tb,n %0", xoperands);
7004 /* ??? This branch may not reach its target. */
7005 output_asm_insn ("nop\n\tb,n %0", xoperands);
7007 /* Delete the jump. */
7008 PUT_CODE (NEXT_INSN (insn), NOTE);
7009 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7010 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7015 /* Return the attribute length of the call instruction INSN. The SIBCALL
7016 flag indicates whether INSN is a regular call or a sibling call. The
7017 length returned must be longer than the code actually generated by
7018 output_call. Since branch shortening is done before delay branch
7019 sequencing, there is no way to determine whether or not the delay
7020 slot will be filled during branch shortening. Even when the delay
7021 slot is filled, we may have to add a nop if the delay slot contains
7022 a branch that can't reach its target. Thus, we always have to include
7023 the delay slot in the length estimate. This used to be done in
7024 pa_adjust_insn_length but we do it here now as some sequences always
7025 fill the delay slot and we can save four bytes in the estimate for
7029 attr_length_call (rtx insn, int sibcall)
7035 rtx pat = PATTERN (insn);
7036 unsigned long distance = -1;
7038 if (INSN_ADDRESSES_SET_P ())
7040 unsigned long total;
7042 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7043 distance = (total + insn_current_reference_address (insn));
7044 if (distance < total)
7048 /* Determine if this is a local call. */
7049 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL)
7050 call_dest = XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0);
7052 call_dest = XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0);
7054 call_decl = SYMBOL_REF_DECL (call_dest);
7055 local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7057 /* pc-relative branch. */
7058 if (!TARGET_LONG_CALLS
7059 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7060 || distance < 240000))
7063 /* 64-bit plabel sequence. */
7064 else if (TARGET_64BIT && !local_call)
7065 length += sibcall ? 28 : 24;
7067 /* non-pic long absolute branch sequence. */
7068 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7071 /* long pc-relative branch sequence. */
7072 else if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7073 || (TARGET_64BIT && !TARGET_GAS)
7074 || (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7078 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS)
7082 /* 32-bit plabel sequence. */
7088 length += length_fp_args (insn);
7098 if (!TARGET_NO_SPACE_REGS)
7106 /* INSN is a function call. It may have an unconditional jump
7109 CALL_DEST is the routine we are calling. */
7112 output_call (rtx insn, rtx call_dest, int sibcall)
7114 int delay_insn_deleted = 0;
7115 int delay_slot_filled = 0;
7116 int seq_length = dbr_sequence_length ();
7117 tree call_decl = SYMBOL_REF_DECL (call_dest);
7118 int local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7121 xoperands[0] = call_dest;
7123 /* Handle the common case where we're sure that the branch will reach
7124 the beginning of the "$CODE$" subspace. This is the beginning of
7125 the current function if we are in a named section. */
7126 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7128 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7129 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7133 if (TARGET_64BIT && !local_call)
7135 /* ??? As far as I can tell, the HP linker doesn't support the
7136 long pc-relative sequence described in the 64-bit runtime
7137 architecture. So, we use a slightly longer indirect call. */
7138 xoperands[0] = get_deferred_plabel (call_dest);
7139 xoperands[1] = gen_label_rtx ();
7141 /* If this isn't a sibcall, we put the load of %r27 into the
7142 delay slot. We can't do this in a sibcall as we don't
7143 have a second call-clobbered scratch register available. */
7145 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7148 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7151 /* Now delete the delay insn. */
7152 PUT_CODE (NEXT_INSN (insn), NOTE);
7153 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7154 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7155 delay_insn_deleted = 1;
7158 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7159 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7160 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7164 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7165 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7166 output_asm_insn ("bve (%%r1)", xoperands);
7170 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7171 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7172 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7173 delay_slot_filled = 1;
7178 int indirect_call = 0;
7180 /* Emit a long call. There are several different sequences
7181 of increasing length and complexity. In most cases,
7182 they don't allow an instruction in the delay slot. */
7183 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7184 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7185 && !(TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7190 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7192 && (!TARGET_PA_20 || indirect_call))
7194 /* A non-jump insn in the delay slot. By definition we can
7195 emit this insn before the call (and in fact before argument
7197 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7200 /* Now delete the delay insn. */
7201 PUT_CODE (NEXT_INSN (insn), NOTE);
7202 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7203 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7204 delay_insn_deleted = 1;
7207 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7209 /* This is the best sequence for making long calls in
7210 non-pic code. Unfortunately, GNU ld doesn't provide
7211 the stub needed for external calls, and GAS's support
7212 for this with the SOM linker is buggy. It is safe
7213 to use this for local calls. */
7214 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7216 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7220 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7223 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7225 output_asm_insn ("copy %%r31,%%r2", xoperands);
7226 delay_slot_filled = 1;
7231 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7232 || (TARGET_64BIT && !TARGET_GAS))
7234 /* The HP assembler and linker can handle relocations
7235 for the difference of two symbols. GAS and the HP
7236 linker can't do this when one of the symbols is
7238 xoperands[1] = gen_label_rtx ();
7239 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7240 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7241 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7242 CODE_LABEL_NUMBER (xoperands[1]));
7243 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7245 else if (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7247 /* GAS currently can't generate the relocations that
7248 are needed for the SOM linker under HP-UX using this
7249 sequence. The GNU linker doesn't generate the stubs
7250 that are needed for external calls on TARGET_ELF32
7251 with this sequence. For now, we have to use a
7252 longer plabel sequence when using GAS. */
7253 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7254 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7256 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7261 /* Emit a long plabel-based call sequence. This is
7262 essentially an inline implementation of $$dyncall.
7263 We don't actually try to call $$dyncall as this is
7264 as difficult as calling the function itself. */
7265 xoperands[0] = get_deferred_plabel (call_dest);
7266 xoperands[1] = gen_label_rtx ();
7268 /* Since the call is indirect, FP arguments in registers
7269 need to be copied to the general registers. Then, the
7270 argument relocation stub will copy them back. */
7272 copy_fp_args (insn);
7276 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7277 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7278 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7282 output_asm_insn ("addil LR'%0-$global$,%%r27",
7284 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7288 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7289 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7290 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7291 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7293 if (!sibcall && !TARGET_PA_20)
7295 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7296 if (TARGET_NO_SPACE_REGS)
7297 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7299 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7306 output_asm_insn ("bve (%%r1)", xoperands);
7311 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7312 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7313 delay_slot_filled = 1;
7316 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7321 if (!TARGET_NO_SPACE_REGS)
7322 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7327 if (TARGET_NO_SPACE_REGS)
7328 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7330 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7334 if (TARGET_NO_SPACE_REGS)
7335 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7337 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7340 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7342 output_asm_insn ("copy %%r31,%%r2", xoperands);
7343 delay_slot_filled = 1;
7350 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7351 output_asm_insn ("nop", xoperands);
7353 /* We are done if there isn't a jump in the delay slot. */
7355 || delay_insn_deleted
7356 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7359 /* A sibcall should never have a branch in the delay slot. */
7360 gcc_assert (!sibcall);
7362 /* This call has an unconditional jump in its delay slot. */
7363 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7365 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7367 /* See if the return address can be adjusted. Use the containing
7368 sequence insn's address. */
7369 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7370 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7371 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7373 if (VAL_14_BITS_P (distance))
7375 xoperands[1] = gen_label_rtx ();
7376 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7377 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7378 CODE_LABEL_NUMBER (xoperands[1]));
7381 output_asm_insn ("nop\n\tb,n %0", xoperands);
7384 output_asm_insn ("b,n %0", xoperands);
7386 /* Delete the jump. */
7387 PUT_CODE (NEXT_INSN (insn), NOTE);
7388 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7389 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7394 /* Return the attribute length of the indirect call instruction INSN.
7395 The length must match the code generated by output_indirect call.
7396 The returned length includes the delay slot. Currently, the delay
7397 slot of an indirect call sequence is not exposed and it is used by
7398 the sequence itself. */
7401 attr_length_indirect_call (rtx insn)
7403 unsigned long distance = -1;
7404 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7406 if (INSN_ADDRESSES_SET_P ())
7408 distance = (total + insn_current_reference_address (insn));
7409 if (distance < total)
7416 if (TARGET_FAST_INDIRECT_CALLS
7417 || (!TARGET_PORTABLE_RUNTIME
7418 && ((TARGET_PA_20 && distance < 7600000) || distance < 240000)))
7424 if (TARGET_PORTABLE_RUNTIME)
7427 /* Out of reach, can use ble. */
7432 output_indirect_call (rtx insn, rtx call_dest)
7438 xoperands[0] = call_dest;
7439 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7440 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7444 /* First the special case for kernels, level 0 systems, etc. */
7445 if (TARGET_FAST_INDIRECT_CALLS)
7446 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7448 /* Now the normal case -- we can reach $$dyncall directly or
7449 we're sure that we can get there via a long-branch stub.
7451 No need to check target flags as the length uniquely identifies
7452 the remaining cases. */
7453 if (attr_length_indirect_call (insn) == 8)
7455 /* The HP linker substitutes a BLE for millicode calls using
7456 the short PIC PCREL form. Thus, we must use %r31 as the
7457 link register when generating PA 1.x code. */
7459 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7461 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7464 /* Long millicode call, but we are not generating PIC or portable runtime
7466 if (attr_length_indirect_call (insn) == 12)
7467 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7469 /* Long millicode call for portable runtime. */
7470 if (attr_length_indirect_call (insn) == 20)
7471 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7473 /* We need a long PIC call to $$dyncall. */
7474 xoperands[0] = NULL_RTX;
7475 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7476 if (TARGET_SOM || !TARGET_GAS)
7478 xoperands[0] = gen_label_rtx ();
7479 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7480 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7481 CODE_LABEL_NUMBER (xoperands[0]));
7482 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7486 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7487 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7490 output_asm_insn ("blr %%r0,%%r2", xoperands);
7491 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7495 /* Return the total length of the save and restore instructions needed for
7496 the data linkage table pointer (i.e., the PIC register) across the call
7497 instruction INSN. No-return calls do not require a save and restore.
7498 In addition, we may be able to avoid the save and restore for calls
7499 within the same translation unit. */
7502 attr_length_save_restore_dltp (rtx insn)
7504 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7510 /* In HPUX 8.0's shared library scheme, special relocations are needed
7511 for function labels if they might be passed to a function
7512 in a shared library (because shared libraries don't live in code
7513 space), and special magic is needed to construct their address. */
7516 hppa_encode_label (rtx sym)
7518 const char *str = XSTR (sym, 0);
7519 int len = strlen (str) + 1;
7522 p = newstr = alloca (len + 1);
7526 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7530 pa_encode_section_info (tree decl, rtx rtl, int first)
7532 default_encode_section_info (decl, rtl, first);
7534 if (first && TEXT_SPACE_P (decl))
7536 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7537 if (TREE_CODE (decl) == FUNCTION_DECL)
7538 hppa_encode_label (XEXP (rtl, 0));
7542 /* This is sort of inverse to pa_encode_section_info. */
7545 pa_strip_name_encoding (const char *str)
7547 str += (*str == '@');
7548 str += (*str == '*');
7553 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7555 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7558 /* Returns 1 if OP is a function label involved in a simple addition
7559 with a constant. Used to keep certain patterns from matching
7560 during instruction combination. */
7562 is_function_label_plus_const (rtx op)
7564 /* Strip off any CONST. */
7565 if (GET_CODE (op) == CONST)
7568 return (GET_CODE (op) == PLUS
7569 && function_label_operand (XEXP (op, 0), Pmode)
7570 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7573 /* Output assembly code for a thunk to FUNCTION. */
7576 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7577 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7580 static unsigned int current_thunk_number;
7581 int val_14 = VAL_14_BITS_P (delta);
7586 xoperands[0] = XEXP (DECL_RTL (function), 0);
7587 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
7588 xoperands[2] = GEN_INT (delta);
7590 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
7591 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7593 /* Output the thunk. We know that the function is in the same
7594 translation unit (i.e., the same space) as the thunk, and that
7595 thunks are output after their method. Thus, we don't need an
7596 external branch to reach the function. With SOM and GAS,
7597 functions and thunks are effectively in different sections.
7598 Thus, we can always use a IA-relative branch and the linker
7599 will add a long branch stub if necessary.
7601 However, we have to be careful when generating PIC code on the
7602 SOM port to ensure that the sequence does not transfer to an
7603 import stub for the target function as this could clobber the
7604 return value saved at SP-24. This would also apply to the
7605 32-bit linux port if the multi-space model is implemented. */
7606 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7607 && !(flag_pic && TREE_PUBLIC (function))
7608 && (TARGET_GAS || last_address < 262132))
7609 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7610 && ((targetm.have_named_sections
7611 && DECL_SECTION_NAME (thunk_fndecl) != NULL
7612 /* The GNU 64-bit linker has rather poor stub management.
7613 So, we use a long branch from thunks that aren't in
7614 the same section as the target function. */
7616 && (DECL_SECTION_NAME (thunk_fndecl)
7617 != DECL_SECTION_NAME (function)))
7618 || ((DECL_SECTION_NAME (thunk_fndecl)
7619 == DECL_SECTION_NAME (function))
7620 && last_address < 262132)))
7621 || (!targetm.have_named_sections && last_address < 262132))))
7624 output_asm_insn ("addil L'%2,%%r26", xoperands);
7626 output_asm_insn ("b %0", xoperands);
7630 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7635 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7639 else if (TARGET_64BIT)
7641 /* We only have one call-clobbered scratch register, so we can't
7642 make use of the delay slot if delta doesn't fit in 14 bits. */
7645 output_asm_insn ("addil L'%2,%%r26", xoperands);
7646 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7649 output_asm_insn ("b,l .+8,%%r1", xoperands);
7653 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7654 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7658 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
7659 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
7664 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7665 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7670 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
7674 else if (TARGET_PORTABLE_RUNTIME)
7676 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7677 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
7680 output_asm_insn ("addil L'%2,%%r26", xoperands);
7682 output_asm_insn ("bv %%r0(%%r22)", xoperands);
7686 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7691 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7695 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7697 /* The function is accessible from outside this module. The only
7698 way to avoid an import stub between the thunk and function is to
7699 call the function directly with an indirect sequence similar to
7700 that used by $$dyncall. This is possible because $$dyncall acts
7701 as the import stub in an indirect call. */
7702 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7703 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
7704 output_asm_insn ("addil LT'%3,%%r19", xoperands);
7705 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
7706 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
7707 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
7708 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
7709 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
7710 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
7714 output_asm_insn ("addil L'%2,%%r26", xoperands);
7720 output_asm_insn ("bve (%%r22)", xoperands);
7723 else if (TARGET_NO_SPACE_REGS)
7725 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
7730 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
7731 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
7732 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
7737 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7739 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7743 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7745 if (TARGET_SOM || !TARGET_GAS)
7747 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
7748 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
7752 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7753 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
7757 output_asm_insn ("addil L'%2,%%r26", xoperands);
7759 output_asm_insn ("bv %%r0(%%r22)", xoperands);
7763 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7768 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7775 output_asm_insn ("addil L'%2,%%r26", xoperands);
7777 output_asm_insn ("ldil L'%0,%%r22", xoperands);
7778 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
7782 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7787 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7792 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
7794 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7797 output_asm_insn (".align 4", xoperands);
7798 ASM_OUTPUT_LABEL (file, label);
7799 output_asm_insn (".word P'%0", xoperands);
7801 else if (TARGET_SOM && TARGET_GAS)
7804 current_thunk_number++;
7805 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
7806 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
7807 last_address += nbytes;
7808 update_total_code_bytes (nbytes);
7811 /* Only direct calls to static functions are allowed to be sibling (tail)
7814 This restriction is necessary because some linker generated stubs will
7815 store return pointers into rp' in some cases which might clobber a
7816 live value already in rp'.
7818 In a sibcall the current function and the target function share stack
7819 space. Thus if the path to the current function and the path to the
7820 target function save a value in rp', they save the value into the
7821 same stack slot, which has undesirable consequences.
7823 Because of the deferred binding nature of shared libraries any function
7824 with external scope could be in a different load module and thus require
7825 rp' to be saved when calling that function. So sibcall optimizations
7826 can only be safe for static function.
7828 Note that GCC never needs return value relocations, so we don't have to
7829 worry about static calls with return value relocations (which require
7832 It is safe to perform a sibcall optimization when the target function
7833 will never return. */
7835 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
7837 if (TARGET_PORTABLE_RUNTIME)
7840 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
7841 single subspace mode and the call is not indirect. As far as I know,
7842 there is no operating system support for the multiple subspace mode.
7843 It might be possible to support indirect calls if we didn't use
7844 $$dyncall (see the indirect sequence generated in output_call). */
7846 return (decl != NULL_TREE);
7848 /* Sibcalls are not ok because the arg pointer register is not a fixed
7849 register. This prevents the sibcall optimization from occurring. In
7850 addition, there are problems with stub placement using GNU ld. This
7851 is because a normal sibcall branch uses a 17-bit relocation while
7852 a regular call branch uses a 22-bit relocation. As a result, more
7853 care needs to be taken in the placement of long-branch stubs. */
7857 /* Sibcalls are only ok within a translation unit. */
7858 return (decl && !TREE_PUBLIC (decl));
7861 /* ??? Addition is not commutative on the PA due to the weird implicit
7862 space register selection rules for memory addresses. Therefore, we
7863 don't consider a + b == b + a, as this might be inside a MEM. */
7865 pa_commutative_p (rtx x, int outer_code)
7867 return (COMMUTATIVE_P (x)
7868 && (TARGET_NO_SPACE_REGS
7869 || (outer_code != UNKNOWN && outer_code != MEM)
7870 || GET_CODE (x) != PLUS));
7873 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7874 use in fmpyadd instructions. */
7876 fmpyaddoperands (rtx *operands)
7878 enum machine_mode mode = GET_MODE (operands[0]);
7880 /* Must be a floating point mode. */
7881 if (mode != SFmode && mode != DFmode)
7884 /* All modes must be the same. */
7885 if (! (mode == GET_MODE (operands[1])
7886 && mode == GET_MODE (operands[2])
7887 && mode == GET_MODE (operands[3])
7888 && mode == GET_MODE (operands[4])
7889 && mode == GET_MODE (operands[5])))
7892 /* All operands must be registers. */
7893 if (! (GET_CODE (operands[1]) == REG
7894 && GET_CODE (operands[2]) == REG
7895 && GET_CODE (operands[3]) == REG
7896 && GET_CODE (operands[4]) == REG
7897 && GET_CODE (operands[5]) == REG))
7900 /* Only 2 real operands to the addition. One of the input operands must
7901 be the same as the output operand. */
7902 if (! rtx_equal_p (operands[3], operands[4])
7903 && ! rtx_equal_p (operands[3], operands[5]))
7906 /* Inout operand of add cannot conflict with any operands from multiply. */
7907 if (rtx_equal_p (operands[3], operands[0])
7908 || rtx_equal_p (operands[3], operands[1])
7909 || rtx_equal_p (operands[3], operands[2]))
7912 /* multiply cannot feed into addition operands. */
7913 if (rtx_equal_p (operands[4], operands[0])
7914 || rtx_equal_p (operands[5], operands[0]))
7917 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
7919 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
7920 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
7921 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
7922 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
7923 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
7924 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
7927 /* Passed. Operands are suitable for fmpyadd. */
7931 #if !defined(USE_COLLECT2)
7933 pa_asm_out_constructor (rtx symbol, int priority)
7935 if (!function_label_operand (symbol, VOIDmode))
7936 hppa_encode_label (symbol);
7938 #ifdef CTORS_SECTION_ASM_OP
7939 default_ctor_section_asm_out_constructor (symbol, priority);
7941 # ifdef TARGET_ASM_NAMED_SECTION
7942 default_named_section_asm_out_constructor (symbol, priority);
7944 default_stabs_asm_out_constructor (symbol, priority);
7950 pa_asm_out_destructor (rtx symbol, int priority)
7952 if (!function_label_operand (symbol, VOIDmode))
7953 hppa_encode_label (symbol);
7955 #ifdef DTORS_SECTION_ASM_OP
7956 default_dtor_section_asm_out_destructor (symbol, priority);
7958 # ifdef TARGET_ASM_NAMED_SECTION
7959 default_named_section_asm_out_destructor (symbol, priority);
7961 default_stabs_asm_out_destructor (symbol, priority);
7967 /* This function places uninitialized global data in the bss section.
7968 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
7969 function on the SOM port to prevent uninitialized global data from
7970 being placed in the data section. */
7973 pa_asm_output_aligned_bss (FILE *stream,
7975 unsigned HOST_WIDE_INT size,
7979 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
7981 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
7982 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
7985 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
7986 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
7989 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
7990 ASM_OUTPUT_LABEL (stream, name);
7991 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
7994 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
7995 that doesn't allow the alignment of global common storage to be directly
7996 specified. The SOM linker aligns common storage based on the rounded
7997 value of the NUM_BYTES parameter in the .comm directive. It's not
7998 possible to use the .align directive as it doesn't affect the alignment
7999 of the label associated with a .comm directive. */
8002 pa_asm_output_aligned_common (FILE *stream,
8004 unsigned HOST_WIDE_INT size,
8007 unsigned int max_common_align;
8009 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8010 if (align > max_common_align)
8012 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8013 "for global common data. Using %u",
8014 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8015 align = max_common_align;
8020 assemble_name (stream, name);
8021 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8022 MAX (size, align / BITS_PER_UNIT));
8025 /* We can't use .comm for local common storage as the SOM linker effectively
8026 treats the symbol as universal and uses the same storage for local symbols
8027 with the same name in different object files. The .block directive
8028 reserves an uninitialized block of storage. However, it's not common
8029 storage. Fortunately, GCC never requests common storage with the same
8030 name in any given translation unit. */
8033 pa_asm_output_aligned_local (FILE *stream,
8035 unsigned HOST_WIDE_INT size,
8039 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8042 fprintf (stream, "%s", LOCAL_ASM_OP);
8043 assemble_name (stream, name);
8044 fprintf (stream, "\n");
8047 ASM_OUTPUT_LABEL (stream, name);
8048 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8051 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8052 use in fmpysub instructions. */
8054 fmpysuboperands (rtx *operands)
8056 enum machine_mode mode = GET_MODE (operands[0]);
8058 /* Must be a floating point mode. */
8059 if (mode != SFmode && mode != DFmode)
8062 /* All modes must be the same. */
8063 if (! (mode == GET_MODE (operands[1])
8064 && mode == GET_MODE (operands[2])
8065 && mode == GET_MODE (operands[3])
8066 && mode == GET_MODE (operands[4])
8067 && mode == GET_MODE (operands[5])))
8070 /* All operands must be registers. */
8071 if (! (GET_CODE (operands[1]) == REG
8072 && GET_CODE (operands[2]) == REG
8073 && GET_CODE (operands[3]) == REG
8074 && GET_CODE (operands[4]) == REG
8075 && GET_CODE (operands[5]) == REG))
8078 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8079 operation, so operands[4] must be the same as operand[3]. */
8080 if (! rtx_equal_p (operands[3], operands[4]))
8083 /* multiply cannot feed into subtraction. */
8084 if (rtx_equal_p (operands[5], operands[0]))
8087 /* Inout operand of sub cannot conflict with any operands from multiply. */
8088 if (rtx_equal_p (operands[3], operands[0])
8089 || rtx_equal_p (operands[3], operands[1])
8090 || rtx_equal_p (operands[3], operands[2]))
8093 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8095 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8096 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8097 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8098 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8099 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8100 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8103 /* Passed. Operands are suitable for fmpysub. */
8107 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8108 constants for shadd instructions. */
8110 shadd_constant_p (int val)
8112 if (val == 2 || val == 4 || val == 8)
8118 /* Return 1 if OP is valid as a base or index register in a
8122 borx_reg_operand (rtx op, enum machine_mode mode)
8124 if (GET_CODE (op) != REG)
8127 /* We must reject virtual registers as the only expressions that
8128 can be instantiated are REG and REG+CONST. */
8129 if (op == virtual_incoming_args_rtx
8130 || op == virtual_stack_vars_rtx
8131 || op == virtual_stack_dynamic_rtx
8132 || op == virtual_outgoing_args_rtx
8133 || op == virtual_cfa_rtx)
8136 /* While it's always safe to index off the frame pointer, it's not
8137 profitable to do so when the frame pointer is being eliminated. */
8138 if (!reload_completed
8139 && flag_omit_frame_pointer
8140 && !current_function_calls_alloca
8141 && op == frame_pointer_rtx)
8144 return register_operand (op, mode);
8147 /* Return 1 if this operand is anything other than a hard register. */
8150 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8152 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8155 /* Return 1 if INSN branches forward. Should be using insn_addresses
8156 to avoid walking through all the insns... */
8158 forward_branch_p (rtx insn)
8160 rtx label = JUMP_LABEL (insn);
8167 insn = NEXT_INSN (insn);
8170 return (insn == label);
8173 /* Return 1 if OP is an equality comparison, else return 0. */
8175 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8177 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8180 /* Return 1 if INSN is in the delay slot of a call instruction. */
8182 jump_in_call_delay (rtx insn)
8185 if (GET_CODE (insn) != JUMP_INSN)
8188 if (PREV_INSN (insn)
8189 && PREV_INSN (PREV_INSN (insn))
8190 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8192 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8194 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8195 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8202 /* Output an unconditional move and branch insn. */
8205 output_parallel_movb (rtx *operands, int length)
8207 /* These are the cases in which we win. */
8209 return "mov%I1b,tr %1,%0,%2";
8211 /* None of these cases wins, but they don't lose either. */
8212 if (dbr_sequence_length () == 0)
8214 /* Nothing in the delay slot, fake it by putting the combined
8215 insn (the copy or add) in the delay slot of a bl. */
8216 if (GET_CODE (operands[1]) == CONST_INT)
8217 return "b %2\n\tldi %1,%0";
8219 return "b %2\n\tcopy %1,%0";
8223 /* Something in the delay slot, but we've got a long branch. */
8224 if (GET_CODE (operands[1]) == CONST_INT)
8225 return "ldi %1,%0\n\tb %2";
8227 return "copy %1,%0\n\tb %2";
8231 /* Output an unconditional add and branch insn. */
8234 output_parallel_addb (rtx *operands, int length)
8236 /* To make life easy we want operand0 to be the shared input/output
8237 operand and operand1 to be the readonly operand. */
8238 if (operands[0] == operands[1])
8239 operands[1] = operands[2];
8241 /* These are the cases in which we win. */
8243 return "add%I1b,tr %1,%0,%3";
8245 /* None of these cases win, but they don't lose either. */
8246 if (dbr_sequence_length () == 0)
8248 /* Nothing in the delay slot, fake it by putting the combined
8249 insn (the copy or add) in the delay slot of a bl. */
8250 return "b %3\n\tadd%I1 %1,%0,%0";
8254 /* Something in the delay slot, but we've got a long branch. */
8255 return "add%I1 %1,%0,%0\n\tb %3";
8259 /* Return nonzero if INSN (a jump insn) immediately follows a call
8260 to a named function. This is used to avoid filling the delay slot
8261 of the jump since it can usually be eliminated by modifying RP in
8262 the delay slot of the call. */
8265 following_call (rtx insn)
8267 if (! TARGET_JUMP_IN_DELAY)
8270 /* Find the previous real insn, skipping NOTEs. */
8271 insn = PREV_INSN (insn);
8272 while (insn && GET_CODE (insn) == NOTE)
8273 insn = PREV_INSN (insn);
8275 /* Check for CALL_INSNs and millicode calls. */
8277 && ((GET_CODE (insn) == CALL_INSN
8278 && get_attr_type (insn) != TYPE_DYNCALL)
8279 || (GET_CODE (insn) == INSN
8280 && GET_CODE (PATTERN (insn)) != SEQUENCE
8281 && GET_CODE (PATTERN (insn)) != USE
8282 && GET_CODE (PATTERN (insn)) != CLOBBER
8283 && get_attr_type (insn) == TYPE_MILLI)))
8289 /* We use this hook to perform a PA specific optimization which is difficult
8290 to do in earlier passes.
8292 We want the delay slots of branches within jump tables to be filled.
8293 None of the compiler passes at the moment even has the notion that a
8294 PA jump table doesn't contain addresses, but instead contains actual
8297 Because we actually jump into the table, the addresses of each entry
8298 must stay constant in relation to the beginning of the table (which
8299 itself must stay constant relative to the instruction to jump into
8300 it). I don't believe we can guarantee earlier passes of the compiler
8301 will adhere to those rules.
8303 So, late in the compilation process we find all the jump tables, and
8304 expand them into real code -- e.g. each entry in the jump table vector
8305 will get an appropriate label followed by a jump to the final target.
8307 Reorg and the final jump pass can then optimize these branches and
8308 fill their delay slots. We end up with smaller, more efficient code.
8310 The jump instructions within the table are special; we must be able
8311 to identify them during assembly output (if the jumps don't get filled
8312 we need to emit a nop rather than nullifying the delay slot)). We
8313 identify jumps in switch tables by using insns with the attribute
8314 type TYPE_BTABLE_BRANCH.
8316 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8317 insns. This serves two purposes, first it prevents jump.c from
8318 noticing that the last N entries in the table jump to the instruction
8319 immediately after the table and deleting the jumps. Second, those
8320 insns mark where we should emit .begin_brtab and .end_brtab directives
8321 when using GAS (allows for better link time optimizations). */
8328 remove_useless_addtr_insns (1);
8330 if (pa_cpu < PROCESSOR_8000)
8331 pa_combine_instructions ();
8334 /* This is fairly cheap, so always run it if optimizing. */
8335 if (optimize > 0 && !TARGET_BIG_SWITCH)
8337 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8338 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8340 rtx pattern, tmp, location, label;
8341 unsigned int length, i;
8343 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8344 if (GET_CODE (insn) != JUMP_INSN
8345 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8346 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8349 /* Emit marker for the beginning of the branch table. */
8350 emit_insn_before (gen_begin_brtab (), insn);
8352 pattern = PATTERN (insn);
8353 location = PREV_INSN (insn);
8354 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8356 for (i = 0; i < length; i++)
8358 /* Emit a label before each jump to keep jump.c from
8359 removing this code. */
8360 tmp = gen_label_rtx ();
8361 LABEL_NUSES (tmp) = 1;
8362 emit_label_after (tmp, location);
8363 location = NEXT_INSN (location);
8365 if (GET_CODE (pattern) == ADDR_VEC)
8366 label = XEXP (XVECEXP (pattern, 0, i), 0);
8368 label = XEXP (XVECEXP (pattern, 1, i), 0);
8370 tmp = gen_short_jump (label);
8372 /* Emit the jump itself. */
8373 tmp = emit_jump_insn_after (tmp, location);
8374 JUMP_LABEL (tmp) = label;
8375 LABEL_NUSES (label)++;
8376 location = NEXT_INSN (location);
8378 /* Emit a BARRIER after the jump. */
8379 emit_barrier_after (location);
8380 location = NEXT_INSN (location);
8383 /* Emit marker for the end of the branch table. */
8384 emit_insn_before (gen_end_brtab (), location);
8385 location = NEXT_INSN (location);
8386 emit_barrier_after (location);
8388 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8394 /* Still need brtab marker insns. FIXME: the presence of these
8395 markers disables output of the branch table to readonly memory,
8396 and any alignment directives that might be needed. Possibly,
8397 the begin_brtab insn should be output before the label for the
8398 table. This doesn't matter at the moment since the tables are
8399 always output in the text section. */
8400 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8402 /* Find an ADDR_VEC insn. */
8403 if (GET_CODE (insn) != JUMP_INSN
8404 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8405 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8408 /* Now generate markers for the beginning and end of the
8410 emit_insn_before (gen_begin_brtab (), insn);
8411 emit_insn_after (gen_end_brtab (), insn);
8416 /* The PA has a number of odd instructions which can perform multiple
8417 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8418 it may be profitable to combine two instructions into one instruction
8419 with two outputs. It's not profitable PA2.0 machines because the
8420 two outputs would take two slots in the reorder buffers.
8422 This routine finds instructions which can be combined and combines
8423 them. We only support some of the potential combinations, and we
8424 only try common ways to find suitable instructions.
8426 * addb can add two registers or a register and a small integer
8427 and jump to a nearby (+-8k) location. Normally the jump to the
8428 nearby location is conditional on the result of the add, but by
8429 using the "true" condition we can make the jump unconditional.
8430 Thus addb can perform two independent operations in one insn.
8432 * movb is similar to addb in that it can perform a reg->reg
8433 or small immediate->reg copy and jump to a nearby (+-8k location).
8435 * fmpyadd and fmpysub can perform a FP multiply and either an
8436 FP add or FP sub if the operands of the multiply and add/sub are
8437 independent (there are other minor restrictions). Note both
8438 the fmpy and fadd/fsub can in theory move to better spots according
8439 to data dependencies, but for now we require the fmpy stay at a
8442 * Many of the memory operations can perform pre & post updates
8443 of index registers. GCC's pre/post increment/decrement addressing
8444 is far too simple to take advantage of all the possibilities. This
8445 pass may not be suitable since those insns may not be independent.
8447 * comclr can compare two ints or an int and a register, nullify
8448 the following instruction and zero some other register. This
8449 is more difficult to use as it's harder to find an insn which
8450 will generate a comclr than finding something like an unconditional
8451 branch. (conditional moves & long branches create comclr insns).
8453 * Most arithmetic operations can conditionally skip the next
8454 instruction. They can be viewed as "perform this operation
8455 and conditionally jump to this nearby location" (where nearby
8456 is an insns away). These are difficult to use due to the
8457 branch length restrictions. */
8460 pa_combine_instructions (void)
8464 /* This can get expensive since the basic algorithm is on the
8465 order of O(n^2) (or worse). Only do it for -O2 or higher
8466 levels of optimization. */
8470 /* Walk down the list of insns looking for "anchor" insns which
8471 may be combined with "floating" insns. As the name implies,
8472 "anchor" instructions don't move, while "floating" insns may
8474 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8475 new = make_insn_raw (new);
8477 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8479 enum attr_pa_combine_type anchor_attr;
8480 enum attr_pa_combine_type floater_attr;
8482 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8483 Also ignore any special USE insns. */
8484 if ((GET_CODE (anchor) != INSN
8485 && GET_CODE (anchor) != JUMP_INSN
8486 && GET_CODE (anchor) != CALL_INSN)
8487 || GET_CODE (PATTERN (anchor)) == USE
8488 || GET_CODE (PATTERN (anchor)) == CLOBBER
8489 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8490 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8493 anchor_attr = get_attr_pa_combine_type (anchor);
8494 /* See if anchor is an insn suitable for combination. */
8495 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8496 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8497 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8498 && ! forward_branch_p (anchor)))
8502 for (floater = PREV_INSN (anchor);
8504 floater = PREV_INSN (floater))
8506 if (GET_CODE (floater) == NOTE
8507 || (GET_CODE (floater) == INSN
8508 && (GET_CODE (PATTERN (floater)) == USE
8509 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8512 /* Anything except a regular INSN will stop our search. */
8513 if (GET_CODE (floater) != INSN
8514 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8515 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8521 /* See if FLOATER is suitable for combination with the
8523 floater_attr = get_attr_pa_combine_type (floater);
8524 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8525 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8526 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8527 && floater_attr == PA_COMBINE_TYPE_FMPY))
8529 /* If ANCHOR and FLOATER can be combined, then we're
8530 done with this pass. */
8531 if (pa_can_combine_p (new, anchor, floater, 0,
8532 SET_DEST (PATTERN (floater)),
8533 XEXP (SET_SRC (PATTERN (floater)), 0),
8534 XEXP (SET_SRC (PATTERN (floater)), 1)))
8538 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8539 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8541 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8543 if (pa_can_combine_p (new, anchor, floater, 0,
8544 SET_DEST (PATTERN (floater)),
8545 XEXP (SET_SRC (PATTERN (floater)), 0),
8546 XEXP (SET_SRC (PATTERN (floater)), 1)))
8551 if (pa_can_combine_p (new, anchor, floater, 0,
8552 SET_DEST (PATTERN (floater)),
8553 SET_SRC (PATTERN (floater)),
8554 SET_SRC (PATTERN (floater))))
8560 /* If we didn't find anything on the backwards scan try forwards. */
8562 && (anchor_attr == PA_COMBINE_TYPE_FMPY
8563 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8565 for (floater = anchor; floater; floater = NEXT_INSN (floater))
8567 if (GET_CODE (floater) == NOTE
8568 || (GET_CODE (floater) == INSN
8569 && (GET_CODE (PATTERN (floater)) == USE
8570 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8574 /* Anything except a regular INSN will stop our search. */
8575 if (GET_CODE (floater) != INSN
8576 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8577 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8583 /* See if FLOATER is suitable for combination with the
8585 floater_attr = get_attr_pa_combine_type (floater);
8586 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8587 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8588 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8589 && floater_attr == PA_COMBINE_TYPE_FMPY))
8591 /* If ANCHOR and FLOATER can be combined, then we're
8592 done with this pass. */
8593 if (pa_can_combine_p (new, anchor, floater, 1,
8594 SET_DEST (PATTERN (floater)),
8595 XEXP (SET_SRC (PATTERN (floater)),
8597 XEXP (SET_SRC (PATTERN (floater)),
8604 /* FLOATER will be nonzero if we found a suitable floating
8605 insn for combination with ANCHOR. */
8607 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8608 || anchor_attr == PA_COMBINE_TYPE_FMPY))
8610 /* Emit the new instruction and delete the old anchor. */
8611 emit_insn_before (gen_rtx_PARALLEL
8613 gen_rtvec (2, PATTERN (anchor),
8614 PATTERN (floater))),
8617 PUT_CODE (anchor, NOTE);
8618 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8619 NOTE_SOURCE_FILE (anchor) = 0;
8621 /* Emit a special USE insn for FLOATER, then delete
8622 the floating insn. */
8623 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8624 delete_insn (floater);
8629 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8632 /* Emit the new_jump instruction and delete the old anchor. */
8634 = emit_jump_insn_before (gen_rtx_PARALLEL
8636 gen_rtvec (2, PATTERN (anchor),
8637 PATTERN (floater))),
8640 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8641 PUT_CODE (anchor, NOTE);
8642 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8643 NOTE_SOURCE_FILE (anchor) = 0;
8645 /* Emit a special USE insn for FLOATER, then delete
8646 the floating insn. */
8647 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8648 delete_insn (floater);
8656 pa_can_combine_p (rtx new, rtx anchor, rtx floater, int reversed, rtx dest,
8659 int insn_code_number;
8662 /* Create a PARALLEL with the patterns of ANCHOR and
8663 FLOATER, try to recognize it, then test constraints
8664 for the resulting pattern.
8666 If the pattern doesn't match or the constraints
8667 aren't met keep searching for a suitable floater
8669 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8670 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8671 INSN_CODE (new) = -1;
8672 insn_code_number = recog_memoized (new);
8673 if (insn_code_number < 0
8674 || (extract_insn (new), ! constrain_operands (1)))
8688 /* There's up to three operands to consider. One
8689 output and two inputs.
8691 The output must not be used between FLOATER & ANCHOR
8692 exclusive. The inputs must not be set between
8693 FLOATER and ANCHOR exclusive. */
8695 if (reg_used_between_p (dest, start, end))
8698 if (reg_set_between_p (src1, start, end))
8701 if (reg_set_between_p (src2, start, end))
8704 /* If we get here, then everything is good. */
8708 /* Return nonzero if references for INSN are delayed.
8710 Millicode insns are actually function calls with some special
8711 constraints on arguments and register usage.
8713 Millicode calls always expect their arguments in the integer argument
8714 registers, and always return their result in %r29 (ret1). They
8715 are expected to clobber their arguments, %r1, %r29, and the return
8716 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8718 This function tells reorg that the references to arguments and
8719 millicode calls do not appear to happen until after the millicode call.
8720 This allows reorg to put insns which set the argument registers into the
8721 delay slot of the millicode call -- thus they act more like traditional
8724 Note we cannot consider side effects of the insn to be delayed because
8725 the branch and link insn will clobber the return pointer. If we happened
8726 to use the return pointer in the delay slot of the call, then we lose.
8728 get_attr_type will try to recognize the given insn, so make sure to
8729 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8732 insn_refs_are_delayed (rtx insn)
8734 return ((GET_CODE (insn) == INSN
8735 && GET_CODE (PATTERN (insn)) != SEQUENCE
8736 && GET_CODE (PATTERN (insn)) != USE
8737 && GET_CODE (PATTERN (insn)) != CLOBBER
8738 && get_attr_type (insn) == TYPE_MILLI));
8741 /* On the HP-PA the value is found in register(s) 28(-29), unless
8742 the mode is SF or DF. Then the value is returned in fr4 (32).
8744 This must perform the same promotions as PROMOTE_MODE, else
8745 TARGET_PROMOTE_FUNCTION_RETURN will not work correctly.
8747 Small structures must be returned in a PARALLEL on PA64 in order
8748 to match the HP Compiler ABI. */
8751 function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
8753 enum machine_mode valmode;
8755 if (AGGREGATE_TYPE_P (valtype))
8759 /* Aggregates with a size less than or equal to 128 bits are
8760 returned in GR 28(-29). They are left justified. The pad
8761 bits are undefined. Larger aggregates are returned in
8765 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
8767 for (i = 0; i < ub; i++)
8769 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8770 gen_rtx_REG (DImode, 28 + i),
8775 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
8777 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
8779 /* Aggregates 5 to 8 bytes in size are returned in general
8780 registers r28-r29 in the same manner as other non
8781 floating-point objects. The data is right-justified and
8782 zero-extended to 64 bits. This is opposite to the normal
8783 justification used on big endian targets and requires
8784 special treatment. */
8785 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
8786 gen_rtx_REG (DImode, 28), const0_rtx);
8787 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
8791 if ((INTEGRAL_TYPE_P (valtype)
8792 && TYPE_PRECISION (valtype) < BITS_PER_WORD)
8793 || POINTER_TYPE_P (valtype))
8794 valmode = word_mode;
8796 valmode = TYPE_MODE (valtype);
8798 if (TREE_CODE (valtype) == REAL_TYPE
8799 && !AGGREGATE_TYPE_P (valtype)
8800 && TYPE_MODE (valtype) != TFmode
8801 && !TARGET_SOFT_FLOAT)
8802 return gen_rtx_REG (valmode, 32);
8804 return gen_rtx_REG (valmode, 28);
8807 /* Return the location of a parameter that is passed in a register or NULL
8808 if the parameter has any component that is passed in memory.
8810 This is new code and will be pushed to into the net sources after
8813 ??? We might want to restructure this so that it looks more like other
8816 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
8817 int named ATTRIBUTE_UNUSED)
8819 int max_arg_words = (TARGET_64BIT ? 8 : 4);
8826 if (mode == VOIDmode)
8829 arg_size = FUNCTION_ARG_SIZE (mode, type);
8831 /* If this arg would be passed partially or totally on the stack, then
8832 this routine should return zero. pa_arg_partial_bytes will
8833 handle arguments which are split between regs and stack slots if
8834 the ABI mandates split arguments. */
8837 /* The 32-bit ABI does not split arguments. */
8838 if (cum->words + arg_size > max_arg_words)
8844 alignment = cum->words & 1;
8845 if (cum->words + alignment >= max_arg_words)
8849 /* The 32bit ABIs and the 64bit ABIs are rather different,
8850 particularly in their handling of FP registers. We might
8851 be able to cleverly share code between them, but I'm not
8852 going to bother in the hope that splitting them up results
8853 in code that is more easily understood. */
8857 /* Advance the base registers to their current locations.
8859 Remember, gprs grow towards smaller register numbers while
8860 fprs grow to higher register numbers. Also remember that
8861 although FP regs are 32-bit addressable, we pretend that
8862 the registers are 64-bits wide. */
8863 gpr_reg_base = 26 - cum->words;
8864 fpr_reg_base = 32 + cum->words;
8866 /* Arguments wider than one word and small aggregates need special
8870 || (type && AGGREGATE_TYPE_P (type)))
8872 /* Double-extended precision (80-bit), quad-precision (128-bit)
8873 and aggregates including complex numbers are aligned on
8874 128-bit boundaries. The first eight 64-bit argument slots
8875 are associated one-to-one, with general registers r26
8876 through r19, and also with floating-point registers fr4
8877 through fr11. Arguments larger than one word are always
8878 passed in general registers.
8880 Using a PARALLEL with a word mode register results in left
8881 justified data on a big-endian target. */
8884 int i, offset = 0, ub = arg_size;
8886 /* Align the base register. */
8887 gpr_reg_base -= alignment;
8889 ub = MIN (ub, max_arg_words - cum->words - alignment);
8890 for (i = 0; i < ub; i++)
8892 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8893 gen_rtx_REG (DImode, gpr_reg_base),
8899 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
8904 /* If the argument is larger than a word, then we know precisely
8905 which registers we must use. */
8919 /* Structures 5 to 8 bytes in size are passed in the general
8920 registers in the same manner as other non floating-point
8921 objects. The data is right-justified and zero-extended
8922 to 64 bits. This is opposite to the normal justification
8923 used on big endian targets and requires special treatment.
8924 We now define BLOCK_REG_PADDING to pad these objects. */
8925 if (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
8927 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
8928 gen_rtx_REG (DImode, gpr_reg_base),
8930 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
8935 /* We have a single word (32 bits). A simple computation
8936 will get us the register #s we need. */
8937 gpr_reg_base = 26 - cum->words;
8938 fpr_reg_base = 32 + 2 * cum->words;
8942 /* Determine if the argument needs to be passed in both general and
8943 floating point registers. */
8944 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
8945 /* If we are doing soft-float with portable runtime, then there
8946 is no need to worry about FP regs. */
8947 && !TARGET_SOFT_FLOAT
8948 /* The parameter must be some kind of float, else we can just
8949 pass it in integer registers. */
8950 && FLOAT_MODE_P (mode)
8951 /* The target function must not have a prototype. */
8952 && cum->nargs_prototype <= 0
8953 /* libcalls do not need to pass items in both FP and general
8955 && type != NULL_TREE
8956 /* All this hair applies to "outgoing" args only. This includes
8957 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
8959 /* Also pass outgoing floating arguments in both registers in indirect
8960 calls with the 32 bit ABI and the HP assembler since there is no
8961 way to the specify argument locations in static functions. */
8966 && FLOAT_MODE_P (mode)))
8972 gen_rtx_EXPR_LIST (VOIDmode,
8973 gen_rtx_REG (mode, fpr_reg_base),
8975 gen_rtx_EXPR_LIST (VOIDmode,
8976 gen_rtx_REG (mode, gpr_reg_base),
8981 /* See if we should pass this parameter in a general register. */
8982 if (TARGET_SOFT_FLOAT
8983 /* Indirect calls in the normal 32bit ABI require all arguments
8984 to be passed in general registers. */
8985 || (!TARGET_PORTABLE_RUNTIME
8989 /* If the parameter is not a floating point parameter, then
8990 it belongs in GPRs. */
8991 || !FLOAT_MODE_P (mode)
8992 /* Structure with single SFmode field belongs in GPR. */
8993 || (type && AGGREGATE_TYPE_P (type)))
8994 retval = gen_rtx_REG (mode, gpr_reg_base);
8996 retval = gen_rtx_REG (mode, fpr_reg_base);
9002 /* If this arg would be passed totally in registers or totally on the stack,
9003 then this routine should return zero. */
9006 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9007 tree type, bool named ATTRIBUTE_UNUSED)
9009 unsigned int max_arg_words = 8;
9010 unsigned int offset = 0;
9015 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9018 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9019 /* Arg fits fully into registers. */
9021 else if (cum->words + offset >= max_arg_words)
9022 /* Arg fully on the stack. */
9026 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9030 /* Return a string to output before text in the current function.
9032 This function is only used with SOM. Because we don't support
9033 named subspaces, we can only create a new subspace or switch back
9034 to the default text subspace. */
9036 som_text_section_asm_op (void)
9043 if (cfun && !cfun->machine->in_nsubspa)
9045 /* We only want to emit a .nsubspa directive once at the
9046 start of the function. */
9047 cfun->machine->in_nsubspa = 1;
9049 /* Create a new subspace for the text. This provides
9050 better stub placement and one-only functions. */
9052 && DECL_ONE_ONLY (cfun->decl)
9053 && !DECL_WEAK (cfun->decl))
9055 "\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,SORT=24,COMDAT";
9057 return "\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$";
9061 /* There isn't a current function or the body of the current
9062 function has been completed. So, we are changing to the
9063 text section to output debugging information. Do this in
9064 the default text section. We need to forget that we are
9065 in the text section so that the function text_section in
9066 varasm.c will call us the next time around. */
9071 return "\t.SPACE $TEXT$\n\t.SUBSPA $CODE$";
9074 /* On hpux10, the linker will give an error if we have a reference
9075 in the read-only data section to a symbol defined in a shared
9076 library. Therefore, expressions that might require a reloc can
9077 not be placed in the read-only data section. */
9080 pa_select_section (tree exp, int reloc,
9081 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9083 if (TREE_CODE (exp) == VAR_DECL
9084 && TREE_READONLY (exp)
9085 && !TREE_THIS_VOLATILE (exp)
9086 && DECL_INITIAL (exp)
9087 && (DECL_INITIAL (exp) == error_mark_node
9088 || TREE_CONSTANT (DECL_INITIAL (exp)))
9092 && DECL_ONE_ONLY (exp)
9093 && !DECL_WEAK (exp))
9094 som_one_only_readonly_data_section ();
9096 readonly_data_section ();
9098 else if (CONSTANT_CLASS_P (exp) && !reloc)
9099 readonly_data_section ();
9101 && TREE_CODE (exp) == VAR_DECL
9102 && DECL_ONE_ONLY (exp)
9103 && !DECL_WEAK (exp))
9104 som_one_only_data_section ();
9110 pa_globalize_label (FILE *stream, const char *name)
9112 /* We only handle DATA objects here, functions are globalized in
9113 ASM_DECLARE_FUNCTION_NAME. */
9114 if (! FUNCTION_NAME_P (name))
9116 fputs ("\t.EXPORT ", stream);
9117 assemble_name (stream, name);
9118 fputs (",DATA\n", stream);
9122 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9125 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9126 int incoming ATTRIBUTE_UNUSED)
9128 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9131 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9134 pa_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
9136 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9137 PA64 ABI says that objects larger than 128 bits are returned in memory.
9138 Note, int_size_in_bytes can return -1 if the size of the object is
9139 variable or larger than the maximum value that can be expressed as
9140 a HOST_WIDE_INT. It can also return zero for an empty type. The
9141 simplest way to handle variable and empty types is to pass them in
9142 memory. This avoids problems in defining the boundaries of argument
9143 slots, allocating registers, etc. */
9144 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9145 || int_size_in_bytes (type) <= 0);
9148 /* Structure to hold declaration and name of external symbols that are
9149 emitted by GCC. We generate a vector of these symbols and output them
9150 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9151 This avoids putting out names that are never really used. */
9153 typedef struct extern_symbol GTY(())
9159 /* Define gc'd vector type for extern_symbol. */
9160 DEF_VEC_O(extern_symbol);
9161 DEF_VEC_ALLOC_O(extern_symbol,gc);
9163 /* Vector of extern_symbol pointers. */
9164 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9166 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9167 /* Mark DECL (name NAME) as an external reference (assembler output
9168 file FILE). This saves the names to output at the end of the file
9169 if actually referenced. */
9172 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9174 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9176 gcc_assert (file == asm_out_file);
9181 /* Output text required at the end of an assembler file.
9182 This includes deferred plabels and .import directives for
9183 all external symbols that were actually referenced. */
9186 pa_hpux_file_end (void)
9191 if (!NO_DEFERRED_PROFILE_COUNTERS)
9192 output_deferred_profile_counters ();
9194 output_deferred_plabels ();
9196 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9198 tree decl = p->decl;
9200 if (!TREE_ASM_WRITTEN (decl)
9201 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9202 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9205 VEC_free (extern_symbol, gc, extern_symbols);