1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "integrate.h"
49 #include "target-def.h"
52 /* Return nonzero if there is a bypass for the output of
53 OUT_INSN and the fp store IN_INSN. */
55 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
57 enum machine_mode store_mode;
58 enum machine_mode other_mode;
61 if (recog_memoized (in_insn) < 0
62 || (get_attr_type (in_insn) != TYPE_FPSTORE
63 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
64 || recog_memoized (out_insn) < 0)
67 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
69 set = single_set (out_insn);
73 other_mode = GET_MODE (SET_SRC (set));
75 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
79 #ifndef DO_FRAME_NOTES
80 #ifdef INCOMING_RETURN_ADDR_RTX
81 #define DO_FRAME_NOTES 1
83 #define DO_FRAME_NOTES 0
87 static void copy_reg_pointer (rtx, rtx);
88 static void fix_range (const char *);
89 static bool pa_handle_option (size_t, const char *, int);
90 static int hppa_address_cost (rtx);
91 static bool hppa_rtx_costs (rtx, int, int, int *);
92 static inline rtx force_mode (enum machine_mode, rtx);
93 static void pa_reorg (void);
94 static void pa_combine_instructions (void);
95 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
96 static int forward_branch_p (rtx);
97 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
98 static int compute_movmem_length (rtx);
99 static int compute_clrmem_length (rtx);
100 static bool pa_assemble_integer (rtx, unsigned int, int);
101 static void remove_useless_addtr_insns (int);
102 static void store_reg (int, HOST_WIDE_INT, int);
103 static void store_reg_modify (int, int, HOST_WIDE_INT);
104 static void load_reg (int, HOST_WIDE_INT, int);
105 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
106 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
107 static void update_total_code_bytes (int);
108 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
109 static int pa_adjust_cost (rtx, rtx, rtx, int);
110 static int pa_adjust_priority (rtx, int);
111 static int pa_issue_rate (void);
112 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
113 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
115 static void pa_encode_section_info (tree, rtx, int);
116 static const char *pa_strip_name_encoding (const char *);
117 static bool pa_function_ok_for_sibcall (tree, tree);
118 static void pa_globalize_label (FILE *, const char *)
120 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
121 HOST_WIDE_INT, tree);
122 #if !defined(USE_COLLECT2)
123 static void pa_asm_out_constructor (rtx, int);
124 static void pa_asm_out_destructor (rtx, int);
126 static void pa_init_builtins (void);
127 static rtx hppa_builtin_saveregs (void);
128 static tree hppa_gimplify_va_arg_expr (tree, tree, tree *, tree *);
129 static bool pa_scalar_mode_supported_p (enum machine_mode);
130 static bool pa_commutative_p (rtx x, int outer_code);
131 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
132 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
133 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
134 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
135 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
136 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
137 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
138 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
139 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
140 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
141 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
142 static void output_deferred_plabels (void);
143 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
144 #ifdef ASM_OUTPUT_EXTERNAL_REAL
145 static void pa_hpux_file_end (void);
147 #ifdef HPUX_LONG_DOUBLE_LIBRARY
148 static void pa_hpux_init_libfuncs (void);
150 static rtx pa_struct_value_rtx (tree, int);
151 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
153 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
155 static struct machine_function * pa_init_machine_status (void);
156 static enum reg_class pa_secondary_reload (bool, rtx, enum reg_class,
158 secondary_reload_info *);
161 /* The following extra sections are only used for SOM. */
162 static GTY(()) section *som_readonly_data_section;
163 static GTY(()) section *som_one_only_readonly_data_section;
164 static GTY(()) section *som_one_only_data_section;
166 /* Save the operands last given to a compare for use when we
167 generate a scc or bcc insn. */
168 rtx hppa_compare_op0, hppa_compare_op1;
169 enum cmp_type hppa_branch_type;
171 /* Which cpu we are scheduling for. */
172 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
174 /* The UNIX standard to use for predefines and linking. */
175 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
177 /* Counts for the number of callee-saved general and floating point
178 registers which were saved by the current function's prologue. */
179 static int gr_saved, fr_saved;
181 static rtx find_addr_reg (rtx);
183 /* Keep track of the number of bytes we have output in the CODE subspace
184 during this compilation so we'll know when to emit inline long-calls. */
185 unsigned long total_code_bytes;
187 /* The last address of the previous function plus the number of bytes in
188 associated thunks that have been output. This is used to determine if
189 a thunk can use an IA-relative branch to reach its target function. */
190 static int last_address;
192 /* Variables to handle plabels that we discover are necessary at assembly
193 output time. They are output after the current function. */
194 struct deferred_plabel GTY(())
199 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
201 static size_t n_deferred_plabels = 0;
204 /* Initialize the GCC target structure. */
206 #undef TARGET_ASM_ALIGNED_HI_OP
207 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
208 #undef TARGET_ASM_ALIGNED_SI_OP
209 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
210 #undef TARGET_ASM_ALIGNED_DI_OP
211 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
212 #undef TARGET_ASM_UNALIGNED_HI_OP
213 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
214 #undef TARGET_ASM_UNALIGNED_SI_OP
215 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
216 #undef TARGET_ASM_UNALIGNED_DI_OP
217 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
218 #undef TARGET_ASM_INTEGER
219 #define TARGET_ASM_INTEGER pa_assemble_integer
221 #undef TARGET_ASM_FUNCTION_PROLOGUE
222 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
223 #undef TARGET_ASM_FUNCTION_EPILOGUE
224 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
226 #undef TARGET_SCHED_ADJUST_COST
227 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
228 #undef TARGET_SCHED_ADJUST_PRIORITY
229 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
230 #undef TARGET_SCHED_ISSUE_RATE
231 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
233 #undef TARGET_ENCODE_SECTION_INFO
234 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
235 #undef TARGET_STRIP_NAME_ENCODING
236 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
238 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
239 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
241 #undef TARGET_COMMUTATIVE_P
242 #define TARGET_COMMUTATIVE_P pa_commutative_p
244 #undef TARGET_ASM_OUTPUT_MI_THUNK
245 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
246 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
247 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
249 #undef TARGET_ASM_FILE_END
250 #ifdef ASM_OUTPUT_EXTERNAL_REAL
251 #define TARGET_ASM_FILE_END pa_hpux_file_end
253 #define TARGET_ASM_FILE_END output_deferred_plabels
256 #if !defined(USE_COLLECT2)
257 #undef TARGET_ASM_CONSTRUCTOR
258 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
259 #undef TARGET_ASM_DESTRUCTOR
260 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
263 #undef TARGET_DEFAULT_TARGET_FLAGS
264 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
265 #undef TARGET_HANDLE_OPTION
266 #define TARGET_HANDLE_OPTION pa_handle_option
268 #undef TARGET_INIT_BUILTINS
269 #define TARGET_INIT_BUILTINS pa_init_builtins
271 #undef TARGET_RTX_COSTS
272 #define TARGET_RTX_COSTS hppa_rtx_costs
273 #undef TARGET_ADDRESS_COST
274 #define TARGET_ADDRESS_COST hppa_address_cost
276 #undef TARGET_MACHINE_DEPENDENT_REORG
277 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
279 #ifdef HPUX_LONG_DOUBLE_LIBRARY
280 #undef TARGET_INIT_LIBFUNCS
281 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
284 #undef TARGET_PROMOTE_FUNCTION_RETURN
285 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
286 #undef TARGET_PROMOTE_PROTOTYPES
287 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
289 #undef TARGET_STRUCT_VALUE_RTX
290 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
291 #undef TARGET_RETURN_IN_MEMORY
292 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
293 #undef TARGET_MUST_PASS_IN_STACK
294 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
295 #undef TARGET_PASS_BY_REFERENCE
296 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
297 #undef TARGET_CALLEE_COPIES
298 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
299 #undef TARGET_ARG_PARTIAL_BYTES
300 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
302 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
303 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
304 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
305 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
307 #undef TARGET_SCALAR_MODE_SUPPORTED_P
308 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
310 #undef TARGET_CANNOT_FORCE_CONST_MEM
311 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
313 #undef TARGET_SECONDARY_RELOAD
314 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
316 struct gcc_target targetm = TARGET_INITIALIZER;
318 /* Parse the -mfixed-range= option string. */
321 fix_range (const char *const_str)
324 char *str, *dash, *comma;
326 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
327 REG2 are either register names or register numbers. The effect
328 of this option is to mark the registers in the range from REG1 to
329 REG2 as ``fixed'' so they won't be used by the compiler. This is
330 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
332 i = strlen (const_str);
333 str = (char *) alloca (i + 1);
334 memcpy (str, const_str, i + 1);
338 dash = strchr (str, '-');
341 warning (0, "value of -mfixed-range must have form REG1-REG2");
346 comma = strchr (dash + 1, ',');
350 first = decode_reg_name (str);
353 warning (0, "unknown register name: %s", str);
357 last = decode_reg_name (dash + 1);
360 warning (0, "unknown register name: %s", dash + 1);
368 warning (0, "%s-%s is an empty range", str, dash + 1);
372 for (i = first; i <= last; ++i)
373 fixed_regs[i] = call_used_regs[i] = 1;
382 /* Check if all floating point registers have been fixed. */
383 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
388 target_flags |= MASK_DISABLE_FPREGS;
391 /* Implement TARGET_HANDLE_OPTION. */
394 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
399 case OPT_mpa_risc_1_0:
401 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
405 case OPT_mpa_risc_1_1:
407 target_flags &= ~MASK_PA_20;
408 target_flags |= MASK_PA_11;
411 case OPT_mpa_risc_2_0:
413 target_flags |= MASK_PA_11 | MASK_PA_20;
417 if (strcmp (arg, "8000") == 0)
418 pa_cpu = PROCESSOR_8000;
419 else if (strcmp (arg, "7100") == 0)
420 pa_cpu = PROCESSOR_7100;
421 else if (strcmp (arg, "700") == 0)
422 pa_cpu = PROCESSOR_700;
423 else if (strcmp (arg, "7100LC") == 0)
424 pa_cpu = PROCESSOR_7100LC;
425 else if (strcmp (arg, "7200") == 0)
426 pa_cpu = PROCESSOR_7200;
427 else if (strcmp (arg, "7300") == 0)
428 pa_cpu = PROCESSOR_7300;
433 case OPT_mfixed_range_:
443 #if TARGET_HPUX_10_10
449 #if TARGET_HPUX_11_11
461 override_options (void)
463 /* Unconditional branches in the delay slot are not compatible with dwarf2
464 call frame information. There is no benefit in using this optimization
465 on PA8000 and later processors. */
466 if (pa_cpu >= PROCESSOR_8000
467 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
468 || flag_unwind_tables)
469 target_flags &= ~MASK_JUMP_IN_DELAY;
471 if (flag_pic && TARGET_PORTABLE_RUNTIME)
473 warning (0, "PIC code generation is not supported in the portable runtime model");
476 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
478 warning (0, "PIC code generation is not compatible with fast indirect calls");
481 if (! TARGET_GAS && write_symbols != NO_DEBUG)
483 warning (0, "-g is only supported when using GAS on this processor,");
484 warning (0, "-g option disabled");
485 write_symbols = NO_DEBUG;
488 /* We only support the "big PIC" model now. And we always generate PIC
489 code when in 64bit mode. */
490 if (flag_pic == 1 || TARGET_64BIT)
493 /* We can't guarantee that .dword is available for 32-bit targets. */
494 if (UNITS_PER_WORD == 4)
495 targetm.asm_out.aligned_op.di = NULL;
497 /* The unaligned ops are only available when using GAS. */
500 targetm.asm_out.unaligned_op.hi = NULL;
501 targetm.asm_out.unaligned_op.si = NULL;
502 targetm.asm_out.unaligned_op.di = NULL;
505 init_machine_status = pa_init_machine_status;
509 pa_init_builtins (void)
511 #ifdef DONT_HAVE_FPUTC_UNLOCKED
512 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
513 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
514 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
515 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
518 if (built_in_decls [BUILT_IN_FINITE])
519 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
520 if (built_in_decls [BUILT_IN_FINITEF])
521 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
525 /* Function to init struct machine_function.
526 This will be called, via a pointer variable,
527 from push_function_context. */
529 static struct machine_function *
530 pa_init_machine_status (void)
532 return ggc_alloc_cleared (sizeof (machine_function));
535 /* If FROM is a probable pointer register, mark TO as a probable
536 pointer register with the same pointer alignment as FROM. */
539 copy_reg_pointer (rtx to, rtx from)
541 if (REG_POINTER (from))
542 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
545 /* Return 1 if X contains a symbolic expression. We know these
546 expressions will have one of a few well defined forms, so
547 we need only check those forms. */
549 symbolic_expression_p (rtx x)
552 /* Strip off any HIGH. */
553 if (GET_CODE (x) == HIGH)
556 return (symbolic_operand (x, VOIDmode));
559 /* Accept any constant that can be moved in one instruction into a
562 cint_ok_for_move (HOST_WIDE_INT ival)
564 /* OK if ldo, ldil, or zdepi, can be used. */
565 return (VAL_14_BITS_P (ival)
566 || ldil_cint_p (ival)
567 || zdepi_cint_p (ival));
570 /* Return truth value of whether OP can be used as an operand in a
573 adddi3_operand (rtx op, enum machine_mode mode)
575 return (register_operand (op, mode)
576 || (GET_CODE (op) == CONST_INT
577 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
580 /* True iff the operand OP can be used as the destination operand of
581 an integer store. This also implies the operand could be used as
582 the source operand of an integer load. Symbolic, lo_sum and indexed
583 memory operands are not allowed. We accept reloading pseudos and
584 other memory operands. */
586 integer_store_memory_operand (rtx op, enum machine_mode mode)
588 return ((reload_in_progress
590 && REGNO (op) >= FIRST_PSEUDO_REGISTER
591 && reg_renumber [REGNO (op)] < 0)
592 || (GET_CODE (op) == MEM
593 && (reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
594 && !symbolic_memory_operand (op, VOIDmode)
595 && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
596 && !IS_INDEX_ADDR_P (XEXP (op, 0))));
599 /* True iff ldil can be used to load this CONST_INT. The least
600 significant 11 bits of the value must be zero and the value must
601 not change sign when extended from 32 to 64 bits. */
603 ldil_cint_p (HOST_WIDE_INT ival)
605 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
607 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
610 /* True iff zdepi can be used to generate this CONST_INT.
611 zdepi first sign extends a 5-bit signed number to a given field
612 length, then places this field anywhere in a zero. */
614 zdepi_cint_p (unsigned HOST_WIDE_INT x)
616 unsigned HOST_WIDE_INT lsb_mask, t;
618 /* This might not be obvious, but it's at least fast.
619 This function is critical; we don't have the time loops would take. */
621 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
622 /* Return true iff t is a power of two. */
623 return ((t & (t - 1)) == 0);
626 /* True iff depi or extru can be used to compute (reg & mask).
627 Accept bit pattern like these:
632 and_mask_p (unsigned HOST_WIDE_INT mask)
635 mask += mask & -mask;
636 return (mask & (mask - 1)) == 0;
639 /* True iff depi can be used to compute (reg | MASK). */
641 ior_mask_p (unsigned HOST_WIDE_INT mask)
643 mask += mask & -mask;
644 return (mask & (mask - 1)) == 0;
647 /* Legitimize PIC addresses. If the address is already
648 position-independent, we return ORIG. Newly generated
649 position-independent addresses go to REG. If we need more
650 than one register, we lose. */
653 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
657 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
659 /* Labels need special handling. */
660 if (pic_label_operand (orig, mode))
662 /* We do not want to go through the movXX expanders here since that
663 would create recursion.
665 Nor do we really want to call a generator for a named pattern
666 since that requires multiple patterns if we want to support
669 So instead we just emit the raw set, which avoids the movXX
670 expanders completely. */
671 mark_reg_pointer (reg, BITS_PER_UNIT);
672 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
673 current_function_uses_pic_offset_table = 1;
676 if (GET_CODE (orig) == SYMBOL_REF)
682 /* Before reload, allocate a temporary register for the intermediate
683 result. This allows the sequence to be deleted when the final
684 result is unused and the insns are trivially dead. */
685 tmp_reg = ((reload_in_progress || reload_completed)
686 ? reg : gen_reg_rtx (Pmode));
688 emit_move_insn (tmp_reg,
689 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
690 gen_rtx_HIGH (word_mode, orig)));
692 = gen_const_mem (Pmode,
693 gen_rtx_LO_SUM (Pmode, tmp_reg,
694 gen_rtx_UNSPEC (Pmode,
698 current_function_uses_pic_offset_table = 1;
699 mark_reg_pointer (reg, BITS_PER_UNIT);
700 insn = emit_move_insn (reg, pic_ref);
702 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
703 set_unique_reg_note (insn, REG_EQUAL, orig);
707 else if (GET_CODE (orig) == CONST)
711 if (GET_CODE (XEXP (orig, 0)) == PLUS
712 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
716 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
718 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
719 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
720 base == reg ? 0 : reg);
722 if (GET_CODE (orig) == CONST_INT)
724 if (INT_14_BITS (orig))
725 return plus_constant (base, INTVAL (orig));
726 orig = force_reg (Pmode, orig);
728 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
729 /* Likewise, should we set special REG_NOTEs here? */
735 static GTY(()) rtx gen_tls_tga;
738 gen_tls_get_addr (void)
741 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
746 hppa_tls_call (rtx arg)
750 ret = gen_reg_rtx (Pmode);
751 emit_library_call_value (gen_tls_get_addr (), ret,
752 LCT_CONST, Pmode, 1, arg, Pmode);
758 legitimize_tls_address (rtx addr)
760 rtx ret, insn, tmp, t1, t2, tp;
761 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
765 case TLS_MODEL_GLOBAL_DYNAMIC:
766 tmp = gen_reg_rtx (Pmode);
768 emit_insn (gen_tgd_load_pic (tmp, addr));
770 emit_insn (gen_tgd_load (tmp, addr));
771 ret = hppa_tls_call (tmp);
774 case TLS_MODEL_LOCAL_DYNAMIC:
775 ret = gen_reg_rtx (Pmode);
776 tmp = gen_reg_rtx (Pmode);
779 emit_insn (gen_tld_load_pic (tmp, addr));
781 emit_insn (gen_tld_load (tmp, addr));
782 t1 = hppa_tls_call (tmp);
785 t2 = gen_reg_rtx (Pmode);
786 emit_libcall_block (insn, t2, t1,
787 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
789 emit_insn (gen_tld_offset_load (ret, addr, t2));
792 case TLS_MODEL_INITIAL_EXEC:
793 tp = gen_reg_rtx (Pmode);
794 tmp = gen_reg_rtx (Pmode);
795 ret = gen_reg_rtx (Pmode);
796 emit_insn (gen_tp_load (tp));
798 emit_insn (gen_tie_load_pic (tmp, addr));
800 emit_insn (gen_tie_load (tmp, addr));
801 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
804 case TLS_MODEL_LOCAL_EXEC:
805 tp = gen_reg_rtx (Pmode);
806 ret = gen_reg_rtx (Pmode);
807 emit_insn (gen_tp_load (tp));
808 emit_insn (gen_tle_load (ret, addr, tp));
818 /* Try machine-dependent ways of modifying an illegitimate address
819 to be legitimate. If we find one, return the new, valid address.
820 This macro is used in only one place: `memory_address' in explow.c.
822 OLDX is the address as it was before break_out_memory_refs was called.
823 In some cases it is useful to look at this to decide what needs to be done.
825 MODE and WIN are passed so that this macro can use
826 GO_IF_LEGITIMATE_ADDRESS.
828 It is always safe for this macro to do nothing. It exists to recognize
829 opportunities to optimize the output.
831 For the PA, transform:
833 memory(X + <large int>)
837 if (<large int> & mask) >= 16
838 Y = (<large int> & ~mask) + mask + 1 Round up.
840 Y = (<large int> & ~mask) Round down.
842 memory (Z + (<large int> - Y));
844 This is for CSE to find several similar references, and only use one Z.
846 X can either be a SYMBOL_REF or REG, but because combine cannot
847 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
848 D will not fit in 14 bits.
850 MODE_FLOAT references allow displacements which fit in 5 bits, so use
853 MODE_INT references allow displacements which fit in 14 bits, so use
856 This relies on the fact that most mode MODE_FLOAT references will use FP
857 registers and most mode MODE_INT references will use integer registers.
858 (In the rare case of an FP register used in an integer MODE, we depend
859 on secondary reloads to clean things up.)
862 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
863 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
864 addressing modes to be used).
866 Put X and Z into registers. Then put the entire expression into
870 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
871 enum machine_mode mode)
875 /* We need to canonicalize the order of operands in unscaled indexed
876 addresses since the code that checks if an address is valid doesn't
877 always try both orders. */
878 if (!TARGET_NO_SPACE_REGS
879 && GET_CODE (x) == PLUS
880 && GET_MODE (x) == Pmode
881 && REG_P (XEXP (x, 0))
882 && REG_P (XEXP (x, 1))
883 && REG_POINTER (XEXP (x, 0))
884 && !REG_POINTER (XEXP (x, 1)))
885 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
887 if (PA_SYMBOL_REF_TLS_P (x))
888 return legitimize_tls_address (x);
890 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
892 /* Strip off CONST. */
893 if (GET_CODE (x) == CONST)
896 /* Special case. Get the SYMBOL_REF into a register and use indexing.
897 That should always be safe. */
898 if (GET_CODE (x) == PLUS
899 && GET_CODE (XEXP (x, 0)) == REG
900 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
902 rtx reg = force_reg (Pmode, XEXP (x, 1));
903 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
906 /* Note we must reject symbols which represent function addresses
907 since the assembler/linker can't handle arithmetic on plabels. */
908 if (GET_CODE (x) == PLUS
909 && GET_CODE (XEXP (x, 1)) == CONST_INT
910 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
911 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
912 || GET_CODE (XEXP (x, 0)) == REG))
914 rtx int_part, ptr_reg;
916 int offset = INTVAL (XEXP (x, 1));
919 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
920 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
922 /* Choose which way to round the offset. Round up if we
923 are >= halfway to the next boundary. */
924 if ((offset & mask) >= ((mask + 1) / 2))
925 newoffset = (offset & ~ mask) + mask + 1;
927 newoffset = (offset & ~ mask);
929 /* If the newoffset will not fit in 14 bits (ldo), then
930 handling this would take 4 or 5 instructions (2 to load
931 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
932 add the new offset and the SYMBOL_REF.) Combine can
933 not handle 4->2 or 5->2 combinations, so do not create
935 if (! VAL_14_BITS_P (newoffset)
936 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
938 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
941 gen_rtx_HIGH (Pmode, const_part));
944 gen_rtx_LO_SUM (Pmode,
945 tmp_reg, const_part));
949 if (! VAL_14_BITS_P (newoffset))
950 int_part = force_reg (Pmode, GEN_INT (newoffset));
952 int_part = GEN_INT (newoffset);
954 ptr_reg = force_reg (Pmode,
956 force_reg (Pmode, XEXP (x, 0)),
959 return plus_constant (ptr_reg, offset - newoffset);
962 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
964 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
965 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
966 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
967 && (OBJECT_P (XEXP (x, 1))
968 || GET_CODE (XEXP (x, 1)) == SUBREG)
969 && GET_CODE (XEXP (x, 1)) != CONST)
971 int val = INTVAL (XEXP (XEXP (x, 0), 1));
975 if (GET_CODE (reg1) != REG)
976 reg1 = force_reg (Pmode, force_operand (reg1, 0));
978 reg2 = XEXP (XEXP (x, 0), 0);
979 if (GET_CODE (reg2) != REG)
980 reg2 = force_reg (Pmode, force_operand (reg2, 0));
982 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
989 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
991 Only do so for floating point modes since this is more speculative
992 and we lose if it's an integer store. */
993 if (GET_CODE (x) == PLUS
994 && GET_CODE (XEXP (x, 0)) == PLUS
995 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
996 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
997 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
998 && (mode == SFmode || mode == DFmode))
1001 /* First, try and figure out what to use as a base register. */
1002 rtx reg1, reg2, base, idx, orig_base;
1004 reg1 = XEXP (XEXP (x, 0), 1);
1009 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1010 then emit_move_sequence will turn on REG_POINTER so we'll know
1011 it's a base register below. */
1012 if (GET_CODE (reg1) != REG)
1013 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1015 if (GET_CODE (reg2) != REG)
1016 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1018 /* Figure out what the base and index are. */
1020 if (GET_CODE (reg1) == REG
1021 && REG_POINTER (reg1))
1024 orig_base = XEXP (XEXP (x, 0), 1);
1025 idx = gen_rtx_PLUS (Pmode,
1026 gen_rtx_MULT (Pmode,
1027 XEXP (XEXP (XEXP (x, 0), 0), 0),
1028 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1031 else if (GET_CODE (reg2) == REG
1032 && REG_POINTER (reg2))
1035 orig_base = XEXP (x, 1);
1042 /* If the index adds a large constant, try to scale the
1043 constant so that it can be loaded with only one insn. */
1044 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1045 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1046 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1047 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1049 /* Divide the CONST_INT by the scale factor, then add it to A. */
1050 int val = INTVAL (XEXP (idx, 1));
1052 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1053 reg1 = XEXP (XEXP (idx, 0), 0);
1054 if (GET_CODE (reg1) != REG)
1055 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1057 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1059 /* We can now generate a simple scaled indexed address. */
1062 (Pmode, gen_rtx_PLUS (Pmode,
1063 gen_rtx_MULT (Pmode, reg1,
1064 XEXP (XEXP (idx, 0), 1)),
1068 /* If B + C is still a valid base register, then add them. */
1069 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1070 && INTVAL (XEXP (idx, 1)) <= 4096
1071 && INTVAL (XEXP (idx, 1)) >= -4096)
1073 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1076 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1078 reg2 = XEXP (XEXP (idx, 0), 0);
1079 if (GET_CODE (reg2) != CONST_INT)
1080 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1082 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1083 gen_rtx_MULT (Pmode,
1089 /* Get the index into a register, then add the base + index and
1090 return a register holding the result. */
1092 /* First get A into a register. */
1093 reg1 = XEXP (XEXP (idx, 0), 0);
1094 if (GET_CODE (reg1) != REG)
1095 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1097 /* And get B into a register. */
1098 reg2 = XEXP (idx, 1);
1099 if (GET_CODE (reg2) != REG)
1100 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1102 reg1 = force_reg (Pmode,
1103 gen_rtx_PLUS (Pmode,
1104 gen_rtx_MULT (Pmode, reg1,
1105 XEXP (XEXP (idx, 0), 1)),
1108 /* Add the result to our base register and return. */
1109 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1113 /* Uh-oh. We might have an address for x[n-100000]. This needs
1114 special handling to avoid creating an indexed memory address
1115 with x-100000 as the base.
1117 If the constant part is small enough, then it's still safe because
1118 there is a guard page at the beginning and end of the data segment.
1120 Scaled references are common enough that we want to try and rearrange the
1121 terms so that we can use indexing for these addresses too. Only
1122 do the optimization for floatint point modes. */
1124 if (GET_CODE (x) == PLUS
1125 && symbolic_expression_p (XEXP (x, 1)))
1127 /* Ugly. We modify things here so that the address offset specified
1128 by the index expression is computed first, then added to x to form
1129 the entire address. */
1131 rtx regx1, regx2, regy1, regy2, y;
1133 /* Strip off any CONST. */
1135 if (GET_CODE (y) == CONST)
1138 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1140 /* See if this looks like
1141 (plus (mult (reg) (shadd_const))
1142 (const (plus (symbol_ref) (const_int))))
1144 Where const_int is small. In that case the const
1145 expression is a valid pointer for indexing.
1147 If const_int is big, but can be divided evenly by shadd_const
1148 and added to (reg). This allows more scaled indexed addresses. */
1149 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1150 && GET_CODE (XEXP (x, 0)) == MULT
1151 && GET_CODE (XEXP (y, 1)) == CONST_INT
1152 && INTVAL (XEXP (y, 1)) >= -4096
1153 && INTVAL (XEXP (y, 1)) <= 4095
1154 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1155 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1157 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1161 if (GET_CODE (reg1) != REG)
1162 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1164 reg2 = XEXP (XEXP (x, 0), 0);
1165 if (GET_CODE (reg2) != REG)
1166 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1168 return force_reg (Pmode,
1169 gen_rtx_PLUS (Pmode,
1170 gen_rtx_MULT (Pmode,
1175 else if ((mode == DFmode || mode == SFmode)
1176 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1177 && GET_CODE (XEXP (x, 0)) == MULT
1178 && GET_CODE (XEXP (y, 1)) == CONST_INT
1179 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1180 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1181 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1184 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1185 / INTVAL (XEXP (XEXP (x, 0), 1))));
1186 regx2 = XEXP (XEXP (x, 0), 0);
1187 if (GET_CODE (regx2) != REG)
1188 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1189 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1193 gen_rtx_PLUS (Pmode,
1194 gen_rtx_MULT (Pmode, regx2,
1195 XEXP (XEXP (x, 0), 1)),
1196 force_reg (Pmode, XEXP (y, 0))));
1198 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1199 && INTVAL (XEXP (y, 1)) >= -4096
1200 && INTVAL (XEXP (y, 1)) <= 4095)
1202 /* This is safe because of the guard page at the
1203 beginning and end of the data space. Just
1204 return the original address. */
1209 /* Doesn't look like one we can optimize. */
1210 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1211 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1212 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1213 regx1 = force_reg (Pmode,
1214 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1216 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1224 /* For the HPPA, REG and REG+CONST is cost 0
1225 and addresses involving symbolic constants are cost 2.
1227 PIC addresses are very expensive.
1229 It is no coincidence that this has the same structure
1230 as GO_IF_LEGITIMATE_ADDRESS. */
1233 hppa_address_cost (rtx X)
1235 switch (GET_CODE (X))
1248 /* Compute a (partial) cost for rtx X. Return true if the complete
1249 cost has been computed, and false if subexpressions should be
1250 scanned. In either case, *TOTAL contains the cost result. */
1253 hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
1258 if (INTVAL (x) == 0)
1260 else if (INT_14_BITS (x))
1277 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1278 && outer_code != SET)
1285 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1286 *total = COSTS_N_INSNS (3);
1287 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1288 *total = COSTS_N_INSNS (8);
1290 *total = COSTS_N_INSNS (20);
1294 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1296 *total = COSTS_N_INSNS (14);
1304 *total = COSTS_N_INSNS (60);
1307 case PLUS: /* this includes shNadd insns */
1309 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1310 *total = COSTS_N_INSNS (3);
1312 *total = COSTS_N_INSNS (1);
1318 *total = COSTS_N_INSNS (1);
1326 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1327 new rtx with the correct mode. */
1329 force_mode (enum machine_mode mode, rtx orig)
1331 if (mode == GET_MODE (orig))
1334 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1336 return gen_rtx_REG (mode, REGNO (orig));
1339 /* Return 1 if *X is a thread-local symbol. */
1342 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1344 return PA_SYMBOL_REF_TLS_P (*x);
1347 /* Return 1 if X contains a thread-local symbol. */
1350 pa_tls_referenced_p (rtx x)
1352 if (!TARGET_HAVE_TLS)
1355 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1358 /* Emit insns to move operands[1] into operands[0].
1360 Return 1 if we have written out everything that needs to be done to
1361 do the move. Otherwise, return 0 and the caller will emit the move
1364 Note SCRATCH_REG may not be in the proper mode depending on how it
1365 will be used. This routine is responsible for creating a new copy
1366 of SCRATCH_REG in the proper mode. */
1369 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1371 register rtx operand0 = operands[0];
1372 register rtx operand1 = operands[1];
1375 /* We can only handle indexed addresses in the destination operand
1376 of floating point stores. Thus, we need to break out indexed
1377 addresses from the destination operand. */
1378 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1380 /* This is only safe up to the beginning of life analysis. */
1381 gcc_assert (!no_new_pseudos);
1383 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1384 operand0 = replace_equiv_address (operand0, tem);
1387 /* On targets with non-equivalent space registers, break out unscaled
1388 indexed addresses from the source operand before the final CSE.
1389 We have to do this because the REG_POINTER flag is not correctly
1390 carried through various optimization passes and CSE may substitute
1391 a pseudo without the pointer set for one with the pointer set. As
1392 a result, we loose various opportunities to create insns with
1393 unscaled indexed addresses. */
1394 if (!TARGET_NO_SPACE_REGS
1395 && !cse_not_expected
1396 && GET_CODE (operand1) == MEM
1397 && GET_CODE (XEXP (operand1, 0)) == PLUS
1398 && REG_P (XEXP (XEXP (operand1, 0), 0))
1399 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1401 = replace_equiv_address (operand1,
1402 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1405 && reload_in_progress && GET_CODE (operand0) == REG
1406 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1407 operand0 = reg_equiv_mem[REGNO (operand0)];
1408 else if (scratch_reg
1409 && reload_in_progress && GET_CODE (operand0) == SUBREG
1410 && GET_CODE (SUBREG_REG (operand0)) == REG
1411 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1413 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1414 the code which tracks sets/uses for delete_output_reload. */
1415 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1416 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1417 SUBREG_BYTE (operand0));
1418 operand0 = alter_subreg (&temp);
1422 && reload_in_progress && GET_CODE (operand1) == REG
1423 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1424 operand1 = reg_equiv_mem[REGNO (operand1)];
1425 else if (scratch_reg
1426 && reload_in_progress && GET_CODE (operand1) == SUBREG
1427 && GET_CODE (SUBREG_REG (operand1)) == REG
1428 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1430 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1431 the code which tracks sets/uses for delete_output_reload. */
1432 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1433 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1434 SUBREG_BYTE (operand1));
1435 operand1 = alter_subreg (&temp);
1438 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1439 && ((tem = find_replacement (&XEXP (operand0, 0)))
1440 != XEXP (operand0, 0)))
1441 operand0 = replace_equiv_address (operand0, tem);
1443 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1444 && ((tem = find_replacement (&XEXP (operand1, 0)))
1445 != XEXP (operand1, 0)))
1446 operand1 = replace_equiv_address (operand1, tem);
1448 /* Handle secondary reloads for loads/stores of FP registers from
1449 REG+D addresses where D does not fit in 5 or 14 bits, including
1450 (subreg (mem (addr))) cases. */
1452 && fp_reg_operand (operand0, mode)
1453 && ((GET_CODE (operand1) == MEM
1454 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1455 XEXP (operand1, 0)))
1456 || ((GET_CODE (operand1) == SUBREG
1457 && GET_CODE (XEXP (operand1, 0)) == MEM
1458 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1460 XEXP (XEXP (operand1, 0), 0))))))
1462 if (GET_CODE (operand1) == SUBREG)
1463 operand1 = XEXP (operand1, 0);
1465 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1466 it in WORD_MODE regardless of what mode it was originally given
1468 scratch_reg = force_mode (word_mode, scratch_reg);
1470 /* D might not fit in 14 bits either; for such cases load D into
1472 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1474 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1475 emit_move_insn (scratch_reg,
1476 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1478 XEXP (XEXP (operand1, 0), 0),
1482 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1483 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1484 replace_equiv_address (operand1, scratch_reg)));
1487 else if (scratch_reg
1488 && fp_reg_operand (operand1, mode)
1489 && ((GET_CODE (operand0) == MEM
1490 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1492 XEXP (operand0, 0)))
1493 || ((GET_CODE (operand0) == SUBREG)
1494 && GET_CODE (XEXP (operand0, 0)) == MEM
1495 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1497 XEXP (XEXP (operand0, 0), 0)))))
1499 if (GET_CODE (operand0) == SUBREG)
1500 operand0 = XEXP (operand0, 0);
1502 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1503 it in WORD_MODE regardless of what mode it was originally given
1505 scratch_reg = force_mode (word_mode, scratch_reg);
1507 /* D might not fit in 14 bits either; for such cases load D into
1509 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1511 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1512 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1515 XEXP (XEXP (operand0, 0),
1520 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1521 emit_insn (gen_rtx_SET (VOIDmode,
1522 replace_equiv_address (operand0, scratch_reg),
1526 /* Handle secondary reloads for loads of FP registers from constant
1527 expressions by forcing the constant into memory.
1529 Use scratch_reg to hold the address of the memory location.
1531 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1532 NO_REGS when presented with a const_int and a register class
1533 containing only FP registers. Doing so unfortunately creates
1534 more problems than it solves. Fix this for 2.5. */
1535 else if (scratch_reg
1536 && CONSTANT_P (operand1)
1537 && fp_reg_operand (operand0, mode))
1539 rtx const_mem, xoperands[2];
1541 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1542 it in WORD_MODE regardless of what mode it was originally given
1544 scratch_reg = force_mode (word_mode, scratch_reg);
1546 /* Force the constant into memory and put the address of the
1547 memory location into scratch_reg. */
1548 const_mem = force_const_mem (mode, operand1);
1549 xoperands[0] = scratch_reg;
1550 xoperands[1] = XEXP (const_mem, 0);
1551 emit_move_sequence (xoperands, Pmode, 0);
1553 /* Now load the destination register. */
1554 emit_insn (gen_rtx_SET (mode, operand0,
1555 replace_equiv_address (const_mem, scratch_reg)));
1558 /* Handle secondary reloads for SAR. These occur when trying to load
1559 the SAR from memory, FP register, or with a constant. */
1560 else if (scratch_reg
1561 && GET_CODE (operand0) == REG
1562 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1563 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1564 && (GET_CODE (operand1) == MEM
1565 || GET_CODE (operand1) == CONST_INT
1566 || (GET_CODE (operand1) == REG
1567 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1569 /* D might not fit in 14 bits either; for such cases load D into
1571 if (GET_CODE (operand1) == MEM
1572 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1574 /* We are reloading the address into the scratch register, so we
1575 want to make sure the scratch register is a full register. */
1576 scratch_reg = force_mode (word_mode, scratch_reg);
1578 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1579 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1582 XEXP (XEXP (operand1, 0),
1586 /* Now we are going to load the scratch register from memory,
1587 we want to load it in the same width as the original MEM,
1588 which must be the same as the width of the ultimate destination,
1590 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1592 emit_move_insn (scratch_reg,
1593 replace_equiv_address (operand1, scratch_reg));
1597 /* We want to load the scratch register using the same mode as
1598 the ultimate destination. */
1599 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1601 emit_move_insn (scratch_reg, operand1);
1604 /* And emit the insn to set the ultimate destination. We know that
1605 the scratch register has the same mode as the destination at this
1607 emit_move_insn (operand0, scratch_reg);
1610 /* Handle the most common case: storing into a register. */
1611 else if (register_operand (operand0, mode))
1613 if (register_operand (operand1, mode)
1614 || (GET_CODE (operand1) == CONST_INT
1615 && cint_ok_for_move (INTVAL (operand1)))
1616 || (operand1 == CONST0_RTX (mode))
1617 || (GET_CODE (operand1) == HIGH
1618 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1619 /* Only `general_operands' can come here, so MEM is ok. */
1620 || GET_CODE (operand1) == MEM)
1622 /* Various sets are created during RTL generation which don't
1623 have the REG_POINTER flag correctly set. After the CSE pass,
1624 instruction recognition can fail if we don't consistently
1625 set this flag when performing register copies. This should
1626 also improve the opportunities for creating insns that use
1627 unscaled indexing. */
1628 if (REG_P (operand0) && REG_P (operand1))
1630 if (REG_POINTER (operand1)
1631 && !REG_POINTER (operand0)
1632 && !HARD_REGISTER_P (operand0))
1633 copy_reg_pointer (operand0, operand1);
1634 else if (REG_POINTER (operand0)
1635 && !REG_POINTER (operand1)
1636 && !HARD_REGISTER_P (operand1))
1637 copy_reg_pointer (operand1, operand0);
1640 /* When MEMs are broken out, the REG_POINTER flag doesn't
1641 get set. In some cases, we can set the REG_POINTER flag
1642 from the declaration for the MEM. */
1643 if (REG_P (operand0)
1644 && GET_CODE (operand1) == MEM
1645 && !REG_POINTER (operand0))
1647 tree decl = MEM_EXPR (operand1);
1649 /* Set the register pointer flag and register alignment
1650 if the declaration for this memory reference is a
1651 pointer type. Fortran indirect argument references
1654 && !(flag_argument_noalias > 1
1655 && TREE_CODE (decl) == INDIRECT_REF
1656 && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1660 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1662 if (TREE_CODE (decl) == COMPONENT_REF)
1663 decl = TREE_OPERAND (decl, 1);
1665 type = TREE_TYPE (decl);
1666 if (TREE_CODE (type) == ARRAY_TYPE)
1667 type = get_inner_array_type (type);
1669 if (POINTER_TYPE_P (type))
1673 type = TREE_TYPE (type);
1674 /* Using TYPE_ALIGN_OK is rather conservative as
1675 only the ada frontend actually sets it. */
1676 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1678 mark_reg_pointer (operand0, align);
1683 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1687 else if (GET_CODE (operand0) == MEM)
1689 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1690 && !(reload_in_progress || reload_completed))
1692 rtx temp = gen_reg_rtx (DFmode);
1694 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1695 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1698 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1700 /* Run this case quickly. */
1701 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1704 if (! (reload_in_progress || reload_completed))
1706 operands[0] = validize_mem (operand0);
1707 operands[1] = operand1 = force_reg (mode, operand1);
1711 /* Simplify the source if we need to.
1712 Note we do have to handle function labels here, even though we do
1713 not consider them legitimate constants. Loop optimizations can
1714 call the emit_move_xxx with one as a source. */
1715 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1716 || function_label_operand (operand1, mode)
1717 || (GET_CODE (operand1) == HIGH
1718 && symbolic_operand (XEXP (operand1, 0), mode)))
1722 if (GET_CODE (operand1) == HIGH)
1725 operand1 = XEXP (operand1, 0);
1727 if (symbolic_operand (operand1, mode))
1729 /* Argh. The assembler and linker can't handle arithmetic
1732 So we force the plabel into memory, load operand0 from
1733 the memory location, then add in the constant part. */
1734 if ((GET_CODE (operand1) == CONST
1735 && GET_CODE (XEXP (operand1, 0)) == PLUS
1736 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1737 || function_label_operand (operand1, mode))
1739 rtx temp, const_part;
1741 /* Figure out what (if any) scratch register to use. */
1742 if (reload_in_progress || reload_completed)
1744 scratch_reg = scratch_reg ? scratch_reg : operand0;
1745 /* SCRATCH_REG will hold an address and maybe the actual
1746 data. We want it in WORD_MODE regardless of what mode it
1747 was originally given to us. */
1748 scratch_reg = force_mode (word_mode, scratch_reg);
1751 scratch_reg = gen_reg_rtx (Pmode);
1753 if (GET_CODE (operand1) == CONST)
1755 /* Save away the constant part of the expression. */
1756 const_part = XEXP (XEXP (operand1, 0), 1);
1757 gcc_assert (GET_CODE (const_part) == CONST_INT);
1759 /* Force the function label into memory. */
1760 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1764 /* No constant part. */
1765 const_part = NULL_RTX;
1767 /* Force the function label into memory. */
1768 temp = force_const_mem (mode, operand1);
1772 /* Get the address of the memory location. PIC-ify it if
1774 temp = XEXP (temp, 0);
1776 temp = legitimize_pic_address (temp, mode, scratch_reg);
1778 /* Put the address of the memory location into our destination
1781 emit_move_sequence (operands, mode, scratch_reg);
1783 /* Now load from the memory location into our destination
1785 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1786 emit_move_sequence (operands, mode, scratch_reg);
1788 /* And add back in the constant part. */
1789 if (const_part != NULL_RTX)
1790 expand_inc (operand0, const_part);
1799 if (reload_in_progress || reload_completed)
1801 temp = scratch_reg ? scratch_reg : operand0;
1802 /* TEMP will hold an address and maybe the actual
1803 data. We want it in WORD_MODE regardless of what mode it
1804 was originally given to us. */
1805 temp = force_mode (word_mode, temp);
1808 temp = gen_reg_rtx (Pmode);
1810 /* (const (plus (symbol) (const_int))) must be forced to
1811 memory during/after reload if the const_int will not fit
1813 if (GET_CODE (operand1) == CONST
1814 && GET_CODE (XEXP (operand1, 0)) == PLUS
1815 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1816 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1817 && (reload_completed || reload_in_progress)
1820 rtx const_mem = force_const_mem (mode, operand1);
1821 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1823 operands[1] = replace_equiv_address (const_mem, operands[1]);
1824 emit_move_sequence (operands, mode, temp);
1828 operands[1] = legitimize_pic_address (operand1, mode, temp);
1829 if (REG_P (operand0) && REG_P (operands[1]))
1830 copy_reg_pointer (operand0, operands[1]);
1831 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1834 /* On the HPPA, references to data space are supposed to use dp,
1835 register 27, but showing it in the RTL inhibits various cse
1836 and loop optimizations. */
1841 if (reload_in_progress || reload_completed)
1843 temp = scratch_reg ? scratch_reg : operand0;
1844 /* TEMP will hold an address and maybe the actual
1845 data. We want it in WORD_MODE regardless of what mode it
1846 was originally given to us. */
1847 temp = force_mode (word_mode, temp);
1850 temp = gen_reg_rtx (mode);
1852 /* Loading a SYMBOL_REF into a register makes that register
1853 safe to be used as the base in an indexed address.
1855 Don't mark hard registers though. That loses. */
1856 if (GET_CODE (operand0) == REG
1857 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1858 mark_reg_pointer (operand0, BITS_PER_UNIT);
1859 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1860 mark_reg_pointer (temp, BITS_PER_UNIT);
1863 set = gen_rtx_SET (mode, operand0, temp);
1865 set = gen_rtx_SET (VOIDmode,
1867 gen_rtx_LO_SUM (mode, temp, operand1));
1869 emit_insn (gen_rtx_SET (VOIDmode,
1871 gen_rtx_HIGH (mode, operand1)));
1877 else if (pa_tls_referenced_p (operand1))
1882 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1884 addend = XEXP (XEXP (tmp, 0), 1);
1885 tmp = XEXP (XEXP (tmp, 0), 0);
1888 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1889 tmp = legitimize_tls_address (tmp);
1892 tmp = gen_rtx_PLUS (mode, tmp, addend);
1893 tmp = force_operand (tmp, operands[0]);
1897 else if (GET_CODE (operand1) != CONST_INT
1898 || !cint_ok_for_move (INTVAL (operand1)))
1902 HOST_WIDE_INT value = 0;
1903 HOST_WIDE_INT insv = 0;
1906 if (GET_CODE (operand1) == CONST_INT)
1907 value = INTVAL (operand1);
1910 && GET_CODE (operand1) == CONST_INT
1911 && HOST_BITS_PER_WIDE_INT > 32
1912 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1916 /* Extract the low order 32 bits of the value and sign extend.
1917 If the new value is the same as the original value, we can
1918 can use the original value as-is. If the new value is
1919 different, we use it and insert the most-significant 32-bits
1920 of the original value into the final result. */
1921 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1922 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1925 #if HOST_BITS_PER_WIDE_INT > 32
1926 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
1930 operand1 = GEN_INT (nval);
1934 if (reload_in_progress || reload_completed)
1935 temp = scratch_reg ? scratch_reg : operand0;
1937 temp = gen_reg_rtx (mode);
1939 /* We don't directly split DImode constants on 32-bit targets
1940 because PLUS uses an 11-bit immediate and the insn sequence
1941 generated is not as efficient as the one using HIGH/LO_SUM. */
1942 if (GET_CODE (operand1) == CONST_INT
1943 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
1944 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
1947 /* Directly break constant into high and low parts. This
1948 provides better optimization opportunities because various
1949 passes recognize constants split with PLUS but not LO_SUM.
1950 We use a 14-bit signed low part except when the addition
1951 of 0x4000 to the high part might change the sign of the
1953 HOST_WIDE_INT low = value & 0x3fff;
1954 HOST_WIDE_INT high = value & ~ 0x3fff;
1958 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1966 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1967 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1971 emit_insn (gen_rtx_SET (VOIDmode, temp,
1972 gen_rtx_HIGH (mode, operand1)));
1973 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1976 insn = emit_move_insn (operands[0], operands[1]);
1978 /* Now insert the most significant 32 bits of the value
1979 into the register. When we don't have a second register
1980 available, it could take up to nine instructions to load
1981 a 64-bit integer constant. Prior to reload, we force
1982 constants that would take more than three instructions
1983 to load to the constant pool. During and after reload,
1984 we have to handle all possible values. */
1987 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
1988 register and the value to be inserted is outside the
1989 range that can be loaded with three depdi instructions. */
1990 if (temp != operand0 && (insv >= 16384 || insv < -16384))
1992 operand1 = GEN_INT (insv);
1994 emit_insn (gen_rtx_SET (VOIDmode, temp,
1995 gen_rtx_HIGH (mode, operand1)));
1996 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
1997 emit_insn (gen_insv (operand0, GEN_INT (32),
2002 int len = 5, pos = 27;
2004 /* Insert the bits using the depdi instruction. */
2007 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2008 HOST_WIDE_INT sign = v5 < 0;
2010 /* Left extend the insertion. */
2011 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2012 while (pos > 0 && (insv & 1) == sign)
2014 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2019 emit_insn (gen_insv (operand0, GEN_INT (len),
2020 GEN_INT (pos), GEN_INT (v5)));
2022 len = pos > 0 && pos < 5 ? pos : 5;
2028 set_unique_reg_note (insn, REG_EQUAL, op1);
2033 /* Now have insn-emit do whatever it normally does. */
2037 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2038 it will need a link/runtime reloc). */
2041 reloc_needed (tree exp)
2045 switch (TREE_CODE (exp))
2050 case POINTER_PLUS_EXPR:
2053 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2054 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2059 case NON_LVALUE_EXPR:
2060 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2066 unsigned HOST_WIDE_INT ix;
2068 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2070 reloc |= reloc_needed (value);
2083 /* Does operand (which is a symbolic_operand) live in text space?
2084 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2088 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2090 if (GET_CODE (operand) == CONST)
2091 operand = XEXP (XEXP (operand, 0), 0);
2094 if (GET_CODE (operand) == SYMBOL_REF)
2095 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2099 if (GET_CODE (operand) == SYMBOL_REF)
2100 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2106 /* Return the best assembler insn template
2107 for moving operands[1] into operands[0] as a fullword. */
2109 singlemove_string (rtx *operands)
2111 HOST_WIDE_INT intval;
2113 if (GET_CODE (operands[0]) == MEM)
2114 return "stw %r1,%0";
2115 if (GET_CODE (operands[1]) == MEM)
2117 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2122 gcc_assert (GET_MODE (operands[1]) == SFmode);
2124 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2126 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2127 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2129 operands[1] = GEN_INT (i);
2130 /* Fall through to CONST_INT case. */
2132 if (GET_CODE (operands[1]) == CONST_INT)
2134 intval = INTVAL (operands[1]);
2136 if (VAL_14_BITS_P (intval))
2138 else if ((intval & 0x7ff) == 0)
2139 return "ldil L'%1,%0";
2140 else if (zdepi_cint_p (intval))
2141 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2143 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2145 return "copy %1,%0";
2149 /* Compute position (in OP[1]) and width (in OP[2])
2150 useful for copying IMM to a register using the zdepi
2151 instructions. Store the immediate value to insert in OP[0]. */
2153 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2157 /* Find the least significant set bit in IMM. */
2158 for (lsb = 0; lsb < 32; lsb++)
2165 /* Choose variants based on *sign* of the 5-bit field. */
2166 if ((imm & 0x10) == 0)
2167 len = (lsb <= 28) ? 4 : 32 - lsb;
2170 /* Find the width of the bitstring in IMM. */
2171 for (len = 5; len < 32; len++)
2173 if ((imm & (1 << len)) == 0)
2177 /* Sign extend IMM as a 5-bit value. */
2178 imm = (imm & 0xf) - 0x10;
2186 /* Compute position (in OP[1]) and width (in OP[2])
2187 useful for copying IMM to a register using the depdi,z
2188 instructions. Store the immediate value to insert in OP[0]. */
2190 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2192 HOST_WIDE_INT lsb, len;
2194 /* Find the least significant set bit in IMM. */
2195 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2202 /* Choose variants based on *sign* of the 5-bit field. */
2203 if ((imm & 0x10) == 0)
2204 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2205 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2208 /* Find the width of the bitstring in IMM. */
2209 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2211 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2215 /* Sign extend IMM as a 5-bit value. */
2216 imm = (imm & 0xf) - 0x10;
2224 /* Output assembler code to perform a doubleword move insn
2225 with operands OPERANDS. */
2228 output_move_double (rtx *operands)
2230 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2232 rtx addreg0 = 0, addreg1 = 0;
2234 /* First classify both operands. */
2236 if (REG_P (operands[0]))
2238 else if (offsettable_memref_p (operands[0]))
2240 else if (GET_CODE (operands[0]) == MEM)
2245 if (REG_P (operands[1]))
2247 else if (CONSTANT_P (operands[1]))
2249 else if (offsettable_memref_p (operands[1]))
2251 else if (GET_CODE (operands[1]) == MEM)
2256 /* Check for the cases that the operand constraints are not
2257 supposed to allow to happen. */
2258 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2260 /* Handle copies between general and floating registers. */
2262 if (optype0 == REGOP && optype1 == REGOP
2263 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2265 if (FP_REG_P (operands[0]))
2267 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2268 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2269 return "{fldds|fldd} -16(%%sp),%0";
2273 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2274 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2275 return "{ldws|ldw} -12(%%sp),%R0";
2279 /* Handle auto decrementing and incrementing loads and stores
2280 specifically, since the structure of the function doesn't work
2281 for them without major modification. Do it better when we learn
2282 this port about the general inc/dec addressing of PA.
2283 (This was written by tege. Chide him if it doesn't work.) */
2285 if (optype0 == MEMOP)
2287 /* We have to output the address syntax ourselves, since print_operand
2288 doesn't deal with the addresses we want to use. Fix this later. */
2290 rtx addr = XEXP (operands[0], 0);
2291 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2293 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2295 operands[0] = XEXP (addr, 0);
2296 gcc_assert (GET_CODE (operands[1]) == REG
2297 && GET_CODE (operands[0]) == REG);
2299 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2301 /* No overlap between high target register and address
2302 register. (We do this in a non-obvious way to
2303 save a register file writeback) */
2304 if (GET_CODE (addr) == POST_INC)
2305 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2306 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2308 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2310 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2312 operands[0] = XEXP (addr, 0);
2313 gcc_assert (GET_CODE (operands[1]) == REG
2314 && GET_CODE (operands[0]) == REG);
2316 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2317 /* No overlap between high target register and address
2318 register. (We do this in a non-obvious way to save a
2319 register file writeback) */
2320 if (GET_CODE (addr) == PRE_INC)
2321 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2322 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2325 if (optype1 == MEMOP)
2327 /* We have to output the address syntax ourselves, since print_operand
2328 doesn't deal with the addresses we want to use. Fix this later. */
2330 rtx addr = XEXP (operands[1], 0);
2331 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2333 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2335 operands[1] = XEXP (addr, 0);
2336 gcc_assert (GET_CODE (operands[0]) == REG
2337 && GET_CODE (operands[1]) == REG);
2339 if (!reg_overlap_mentioned_p (high_reg, addr))
2341 /* No overlap between high target register and address
2342 register. (We do this in a non-obvious way to
2343 save a register file writeback) */
2344 if (GET_CODE (addr) == POST_INC)
2345 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2346 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2350 /* This is an undefined situation. We should load into the
2351 address register *and* update that register. Probably
2352 we don't need to handle this at all. */
2353 if (GET_CODE (addr) == POST_INC)
2354 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2355 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2358 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2360 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2362 operands[1] = XEXP (addr, 0);
2363 gcc_assert (GET_CODE (operands[0]) == REG
2364 && GET_CODE (operands[1]) == REG);
2366 if (!reg_overlap_mentioned_p (high_reg, addr))
2368 /* No overlap between high target register and address
2369 register. (We do this in a non-obvious way to
2370 save a register file writeback) */
2371 if (GET_CODE (addr) == PRE_INC)
2372 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2373 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2377 /* This is an undefined situation. We should load into the
2378 address register *and* update that register. Probably
2379 we don't need to handle this at all. */
2380 if (GET_CODE (addr) == PRE_INC)
2381 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2382 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2385 else if (GET_CODE (addr) == PLUS
2386 && GET_CODE (XEXP (addr, 0)) == MULT)
2389 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2391 if (!reg_overlap_mentioned_p (high_reg, addr))
2393 xoperands[0] = high_reg;
2394 xoperands[1] = XEXP (addr, 1);
2395 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2396 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2397 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2399 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2403 xoperands[0] = high_reg;
2404 xoperands[1] = XEXP (addr, 1);
2405 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2406 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2407 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2409 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2414 /* If an operand is an unoffsettable memory ref, find a register
2415 we can increment temporarily to make it refer to the second word. */
2417 if (optype0 == MEMOP)
2418 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2420 if (optype1 == MEMOP)
2421 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2423 /* Ok, we can do one word at a time.
2424 Normally we do the low-numbered word first.
2426 In either case, set up in LATEHALF the operands to use
2427 for the high-numbered word and in some cases alter the
2428 operands in OPERANDS to be suitable for the low-numbered word. */
2430 if (optype0 == REGOP)
2431 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2432 else if (optype0 == OFFSOP)
2433 latehalf[0] = adjust_address (operands[0], SImode, 4);
2435 latehalf[0] = operands[0];
2437 if (optype1 == REGOP)
2438 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2439 else if (optype1 == OFFSOP)
2440 latehalf[1] = adjust_address (operands[1], SImode, 4);
2441 else if (optype1 == CNSTOP)
2442 split_double (operands[1], &operands[1], &latehalf[1]);
2444 latehalf[1] = operands[1];
2446 /* If the first move would clobber the source of the second one,
2447 do them in the other order.
2449 This can happen in two cases:
2451 mem -> register where the first half of the destination register
2452 is the same register used in the memory's address. Reload
2453 can create such insns.
2455 mem in this case will be either register indirect or register
2456 indirect plus a valid offset.
2458 register -> register move where REGNO(dst) == REGNO(src + 1)
2459 someone (Tim/Tege?) claimed this can happen for parameter loads.
2461 Handle mem -> register case first. */
2462 if (optype0 == REGOP
2463 && (optype1 == MEMOP || optype1 == OFFSOP)
2464 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2467 /* Do the late half first. */
2469 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2470 output_asm_insn (singlemove_string (latehalf), latehalf);
2474 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2475 return singlemove_string (operands);
2478 /* Now handle register -> register case. */
2479 if (optype0 == REGOP && optype1 == REGOP
2480 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2482 output_asm_insn (singlemove_string (latehalf), latehalf);
2483 return singlemove_string (operands);
2486 /* Normal case: do the two words, low-numbered first. */
2488 output_asm_insn (singlemove_string (operands), operands);
2490 /* Make any unoffsettable addresses point at high-numbered word. */
2492 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2494 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2497 output_asm_insn (singlemove_string (latehalf), latehalf);
2499 /* Undo the adds we just did. */
2501 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2503 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2509 output_fp_move_double (rtx *operands)
2511 if (FP_REG_P (operands[0]))
2513 if (FP_REG_P (operands[1])
2514 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2515 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2517 output_asm_insn ("fldd%F1 %1,%0", operands);
2519 else if (FP_REG_P (operands[1]))
2521 output_asm_insn ("fstd%F0 %1,%0", operands);
2527 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2529 /* This is a pain. You have to be prepared to deal with an
2530 arbitrary address here including pre/post increment/decrement.
2532 so avoid this in the MD. */
2533 gcc_assert (GET_CODE (operands[0]) == REG);
2535 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2536 xoperands[0] = operands[0];
2537 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2542 /* Return a REG that occurs in ADDR with coefficient 1.
2543 ADDR can be effectively incremented by incrementing REG. */
2546 find_addr_reg (rtx addr)
2548 while (GET_CODE (addr) == PLUS)
2550 if (GET_CODE (XEXP (addr, 0)) == REG)
2551 addr = XEXP (addr, 0);
2552 else if (GET_CODE (XEXP (addr, 1)) == REG)
2553 addr = XEXP (addr, 1);
2554 else if (CONSTANT_P (XEXP (addr, 0)))
2555 addr = XEXP (addr, 1);
2556 else if (CONSTANT_P (XEXP (addr, 1)))
2557 addr = XEXP (addr, 0);
2561 gcc_assert (GET_CODE (addr) == REG);
2565 /* Emit code to perform a block move.
2567 OPERANDS[0] is the destination pointer as a REG, clobbered.
2568 OPERANDS[1] is the source pointer as a REG, clobbered.
2569 OPERANDS[2] is a register for temporary storage.
2570 OPERANDS[3] is a register for temporary storage.
2571 OPERANDS[4] is the size as a CONST_INT
2572 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2573 OPERANDS[6] is another temporary register. */
2576 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2578 int align = INTVAL (operands[5]);
2579 unsigned long n_bytes = INTVAL (operands[4]);
2581 /* We can't move more than a word at a time because the PA
2582 has no longer integer move insns. (Could use fp mem ops?) */
2583 if (align > (TARGET_64BIT ? 8 : 4))
2584 align = (TARGET_64BIT ? 8 : 4);
2586 /* Note that we know each loop below will execute at least twice
2587 (else we would have open-coded the copy). */
2591 /* Pre-adjust the loop counter. */
2592 operands[4] = GEN_INT (n_bytes - 16);
2593 output_asm_insn ("ldi %4,%2", operands);
2596 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2597 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2598 output_asm_insn ("std,ma %3,8(%0)", operands);
2599 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2600 output_asm_insn ("std,ma %6,8(%0)", operands);
2602 /* Handle the residual. There could be up to 7 bytes of
2603 residual to copy! */
2604 if (n_bytes % 16 != 0)
2606 operands[4] = GEN_INT (n_bytes % 8);
2607 if (n_bytes % 16 >= 8)
2608 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2609 if (n_bytes % 8 != 0)
2610 output_asm_insn ("ldd 0(%1),%6", operands);
2611 if (n_bytes % 16 >= 8)
2612 output_asm_insn ("std,ma %3,8(%0)", operands);
2613 if (n_bytes % 8 != 0)
2614 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2619 /* Pre-adjust the loop counter. */
2620 operands[4] = GEN_INT (n_bytes - 8);
2621 output_asm_insn ("ldi %4,%2", operands);
2624 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2625 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2626 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2627 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2628 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2630 /* Handle the residual. There could be up to 7 bytes of
2631 residual to copy! */
2632 if (n_bytes % 8 != 0)
2634 operands[4] = GEN_INT (n_bytes % 4);
2635 if (n_bytes % 8 >= 4)
2636 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2637 if (n_bytes % 4 != 0)
2638 output_asm_insn ("ldw 0(%1),%6", operands);
2639 if (n_bytes % 8 >= 4)
2640 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2641 if (n_bytes % 4 != 0)
2642 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2647 /* Pre-adjust the loop counter. */
2648 operands[4] = GEN_INT (n_bytes - 4);
2649 output_asm_insn ("ldi %4,%2", operands);
2652 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2653 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2654 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2655 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2656 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2658 /* Handle the residual. */
2659 if (n_bytes % 4 != 0)
2661 if (n_bytes % 4 >= 2)
2662 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2663 if (n_bytes % 2 != 0)
2664 output_asm_insn ("ldb 0(%1),%6", operands);
2665 if (n_bytes % 4 >= 2)
2666 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2667 if (n_bytes % 2 != 0)
2668 output_asm_insn ("stb %6,0(%0)", operands);
2673 /* Pre-adjust the loop counter. */
2674 operands[4] = GEN_INT (n_bytes - 2);
2675 output_asm_insn ("ldi %4,%2", operands);
2678 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2679 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2680 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2681 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2682 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2684 /* Handle the residual. */
2685 if (n_bytes % 2 != 0)
2687 output_asm_insn ("ldb 0(%1),%3", operands);
2688 output_asm_insn ("stb %3,0(%0)", operands);
2697 /* Count the number of insns necessary to handle this block move.
2699 Basic structure is the same as emit_block_move, except that we
2700 count insns rather than emit them. */
2703 compute_movmem_length (rtx insn)
2705 rtx pat = PATTERN (insn);
2706 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2707 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2708 unsigned int n_insns = 0;
2710 /* We can't move more than four bytes at a time because the PA
2711 has no longer integer move insns. (Could use fp mem ops?) */
2712 if (align > (TARGET_64BIT ? 8 : 4))
2713 align = (TARGET_64BIT ? 8 : 4);
2715 /* The basic copying loop. */
2719 if (n_bytes % (2 * align) != 0)
2721 if ((n_bytes % (2 * align)) >= align)
2724 if ((n_bytes % align) != 0)
2728 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2732 /* Emit code to perform a block clear.
2734 OPERANDS[0] is the destination pointer as a REG, clobbered.
2735 OPERANDS[1] is a register for temporary storage.
2736 OPERANDS[2] is the size as a CONST_INT
2737 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2740 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2742 int align = INTVAL (operands[3]);
2743 unsigned long n_bytes = INTVAL (operands[2]);
2745 /* We can't clear more than a word at a time because the PA
2746 has no longer integer move insns. */
2747 if (align > (TARGET_64BIT ? 8 : 4))
2748 align = (TARGET_64BIT ? 8 : 4);
2750 /* Note that we know each loop below will execute at least twice
2751 (else we would have open-coded the copy). */
2755 /* Pre-adjust the loop counter. */
2756 operands[2] = GEN_INT (n_bytes - 16);
2757 output_asm_insn ("ldi %2,%1", operands);
2760 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2761 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2762 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2764 /* Handle the residual. There could be up to 7 bytes of
2765 residual to copy! */
2766 if (n_bytes % 16 != 0)
2768 operands[2] = GEN_INT (n_bytes % 8);
2769 if (n_bytes % 16 >= 8)
2770 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2771 if (n_bytes % 8 != 0)
2772 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2777 /* Pre-adjust the loop counter. */
2778 operands[2] = GEN_INT (n_bytes - 8);
2779 output_asm_insn ("ldi %2,%1", operands);
2782 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2783 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2784 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2786 /* Handle the residual. There could be up to 7 bytes of
2787 residual to copy! */
2788 if (n_bytes % 8 != 0)
2790 operands[2] = GEN_INT (n_bytes % 4);
2791 if (n_bytes % 8 >= 4)
2792 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2793 if (n_bytes % 4 != 0)
2794 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2799 /* Pre-adjust the loop counter. */
2800 operands[2] = GEN_INT (n_bytes - 4);
2801 output_asm_insn ("ldi %2,%1", operands);
2804 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2805 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2806 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2808 /* Handle the residual. */
2809 if (n_bytes % 4 != 0)
2811 if (n_bytes % 4 >= 2)
2812 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2813 if (n_bytes % 2 != 0)
2814 output_asm_insn ("stb %%r0,0(%0)", operands);
2819 /* Pre-adjust the loop counter. */
2820 operands[2] = GEN_INT (n_bytes - 2);
2821 output_asm_insn ("ldi %2,%1", operands);
2824 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2825 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2826 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2828 /* Handle the residual. */
2829 if (n_bytes % 2 != 0)
2830 output_asm_insn ("stb %%r0,0(%0)", operands);
2839 /* Count the number of insns necessary to handle this block move.
2841 Basic structure is the same as emit_block_move, except that we
2842 count insns rather than emit them. */
2845 compute_clrmem_length (rtx insn)
2847 rtx pat = PATTERN (insn);
2848 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2849 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2850 unsigned int n_insns = 0;
2852 /* We can't clear more than a word at a time because the PA
2853 has no longer integer move insns. */
2854 if (align > (TARGET_64BIT ? 8 : 4))
2855 align = (TARGET_64BIT ? 8 : 4);
2857 /* The basic loop. */
2861 if (n_bytes % (2 * align) != 0)
2863 if ((n_bytes % (2 * align)) >= align)
2866 if ((n_bytes % align) != 0)
2870 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2876 output_and (rtx *operands)
2878 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2880 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2881 int ls0, ls1, ms0, p, len;
2883 for (ls0 = 0; ls0 < 32; ls0++)
2884 if ((mask & (1 << ls0)) == 0)
2887 for (ls1 = ls0; ls1 < 32; ls1++)
2888 if ((mask & (1 << ls1)) != 0)
2891 for (ms0 = ls1; ms0 < 32; ms0++)
2892 if ((mask & (1 << ms0)) == 0)
2895 gcc_assert (ms0 == 32);
2903 operands[2] = GEN_INT (len);
2904 return "{extru|extrw,u} %1,31,%2,%0";
2908 /* We could use this `depi' for the case above as well, but `depi'
2909 requires one more register file access than an `extru'. */
2914 operands[2] = GEN_INT (p);
2915 operands[3] = GEN_INT (len);
2916 return "{depi|depwi} 0,%2,%3,%0";
2920 return "and %1,%2,%0";
2923 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2924 storing the result in operands[0]. */
2926 output_64bit_and (rtx *operands)
2928 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2930 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2931 int ls0, ls1, ms0, p, len;
2933 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2934 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2937 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2938 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2941 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2942 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2945 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
2947 if (ls1 == HOST_BITS_PER_WIDE_INT)
2953 operands[2] = GEN_INT (len);
2954 return "extrd,u %1,63,%2,%0";
2958 /* We could use this `depi' for the case above as well, but `depi'
2959 requires one more register file access than an `extru'. */
2964 operands[2] = GEN_INT (p);
2965 operands[3] = GEN_INT (len);
2966 return "depdi 0,%2,%3,%0";
2970 return "and %1,%2,%0";
2974 output_ior (rtx *operands)
2976 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2977 int bs0, bs1, p, len;
2979 if (INTVAL (operands[2]) == 0)
2980 return "copy %1,%0";
2982 for (bs0 = 0; bs0 < 32; bs0++)
2983 if ((mask & (1 << bs0)) != 0)
2986 for (bs1 = bs0; bs1 < 32; bs1++)
2987 if ((mask & (1 << bs1)) == 0)
2990 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
2995 operands[2] = GEN_INT (p);
2996 operands[3] = GEN_INT (len);
2997 return "{depi|depwi} -1,%2,%3,%0";
3000 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3001 storing the result in operands[0]. */
3003 output_64bit_ior (rtx *operands)
3005 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3006 int bs0, bs1, p, len;
3008 if (INTVAL (operands[2]) == 0)
3009 return "copy %1,%0";
3011 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3012 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3015 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3016 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3019 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3020 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3025 operands[2] = GEN_INT (p);
3026 operands[3] = GEN_INT (len);
3027 return "depdi -1,%2,%3,%0";
3030 /* Target hook for assembling integer objects. This code handles
3031 aligned SI and DI integers specially since function references
3032 must be preceded by P%. */
3035 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3037 if (size == UNITS_PER_WORD
3039 && function_label_operand (x, VOIDmode))
3041 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3042 output_addr_const (asm_out_file, x);
3043 fputc ('\n', asm_out_file);
3046 return default_assemble_integer (x, size, aligned_p);
3049 /* Output an ascii string. */
3051 output_ascii (FILE *file, const char *p, int size)
3055 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3057 /* The HP assembler can only take strings of 256 characters at one
3058 time. This is a limitation on input line length, *not* the
3059 length of the string. Sigh. Even worse, it seems that the
3060 restriction is in number of input characters (see \xnn &
3061 \whatever). So we have to do this very carefully. */
3063 fputs ("\t.STRING \"", file);
3066 for (i = 0; i < size; i += 4)
3070 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3072 register unsigned int c = (unsigned char) p[i + io];
3074 if (c == '\"' || c == '\\')
3075 partial_output[co++] = '\\';
3076 if (c >= ' ' && c < 0177)
3077 partial_output[co++] = c;
3081 partial_output[co++] = '\\';
3082 partial_output[co++] = 'x';
3083 hexd = c / 16 - 0 + '0';
3085 hexd -= '9' - 'a' + 1;
3086 partial_output[co++] = hexd;
3087 hexd = c % 16 - 0 + '0';
3089 hexd -= '9' - 'a' + 1;
3090 partial_output[co++] = hexd;
3093 if (chars_output + co > 243)
3095 fputs ("\"\n\t.STRING \"", file);
3098 fwrite (partial_output, 1, (size_t) co, file);
3102 fputs ("\"\n", file);
3105 /* Try to rewrite floating point comparisons & branches to avoid
3106 useless add,tr insns.
3108 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3109 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3110 first attempt to remove useless add,tr insns. It is zero
3111 for the second pass as reorg sometimes leaves bogus REG_DEAD
3114 When CHECK_NOTES is zero we can only eliminate add,tr insns
3115 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3118 remove_useless_addtr_insns (int check_notes)
3121 static int pass = 0;
3123 /* This is fairly cheap, so always run it when optimizing. */
3127 int fbranch_count = 0;
3129 /* Walk all the insns in this function looking for fcmp & fbranch
3130 instructions. Keep track of how many of each we find. */
3131 for (insn = get_insns (); insn; insn = next_insn (insn))
3135 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3136 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3139 tmp = PATTERN (insn);
3141 /* It must be a set. */
3142 if (GET_CODE (tmp) != SET)
3145 /* If the destination is CCFP, then we've found an fcmp insn. */
3146 tmp = SET_DEST (tmp);
3147 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3153 tmp = PATTERN (insn);
3154 /* If this is an fbranch instruction, bump the fbranch counter. */
3155 if (GET_CODE (tmp) == SET
3156 && SET_DEST (tmp) == pc_rtx
3157 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3158 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3159 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3160 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3168 /* Find all floating point compare + branch insns. If possible,
3169 reverse the comparison & the branch to avoid add,tr insns. */
3170 for (insn = get_insns (); insn; insn = next_insn (insn))
3174 /* Ignore anything that isn't an INSN. */
3175 if (GET_CODE (insn) != INSN)
3178 tmp = PATTERN (insn);
3180 /* It must be a set. */
3181 if (GET_CODE (tmp) != SET)
3184 /* The destination must be CCFP, which is register zero. */
3185 tmp = SET_DEST (tmp);
3186 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3189 /* INSN should be a set of CCFP.
3191 See if the result of this insn is used in a reversed FP
3192 conditional branch. If so, reverse our condition and
3193 the branch. Doing so avoids useless add,tr insns. */
3194 next = next_insn (insn);
3197 /* Jumps, calls and labels stop our search. */
3198 if (GET_CODE (next) == JUMP_INSN
3199 || GET_CODE (next) == CALL_INSN
3200 || GET_CODE (next) == CODE_LABEL)
3203 /* As does another fcmp insn. */
3204 if (GET_CODE (next) == INSN
3205 && GET_CODE (PATTERN (next)) == SET
3206 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3207 && REGNO (SET_DEST (PATTERN (next))) == 0)
3210 next = next_insn (next);
3213 /* Is NEXT_INSN a branch? */
3215 && GET_CODE (next) == JUMP_INSN)
3217 rtx pattern = PATTERN (next);
3219 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3220 and CCFP dies, then reverse our conditional and the branch
3221 to avoid the add,tr. */
3222 if (GET_CODE (pattern) == SET
3223 && SET_DEST (pattern) == pc_rtx
3224 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3225 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3226 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3227 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3228 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3229 && (fcmp_count == fbranch_count
3231 && find_regno_note (next, REG_DEAD, 0))))
3233 /* Reverse the branch. */
3234 tmp = XEXP (SET_SRC (pattern), 1);
3235 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3236 XEXP (SET_SRC (pattern), 2) = tmp;
3237 INSN_CODE (next) = -1;
3239 /* Reverse our condition. */
3240 tmp = PATTERN (insn);
3241 PUT_CODE (XEXP (tmp, 1),
3242 (reverse_condition_maybe_unordered
3243 (GET_CODE (XEXP (tmp, 1)))));
3253 /* You may have trouble believing this, but this is the 32 bit HP-PA
3258 Variable arguments (optional; any number may be allocated)
3260 SP-(4*(N+9)) arg word N
3265 Fixed arguments (must be allocated; may remain unused)
3274 SP-32 External Data Pointer (DP)
3276 SP-24 External/stub RP (RP')
3280 SP-8 Calling Stub RP (RP'')
3285 SP-0 Stack Pointer (points to next available address)
3289 /* This function saves registers as follows. Registers marked with ' are
3290 this function's registers (as opposed to the previous function's).
3291 If a frame_pointer isn't needed, r4 is saved as a general register;
3292 the space for the frame pointer is still allocated, though, to keep
3298 SP (FP') Previous FP
3299 SP + 4 Alignment filler (sigh)
3300 SP + 8 Space for locals reserved here.
3304 SP + n All call saved register used.
3308 SP + o All call saved fp registers used.
3312 SP + p (SP') points to next available address.
3316 /* Global variables set by output_function_prologue(). */
3317 /* Size of frame. Need to know this to emit return insns from
3319 static HOST_WIDE_INT actual_fsize, local_fsize;
3320 static int save_fregs;
3322 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3323 Handle case where DISP > 8k by using the add_high_const patterns.
3325 Note in DISP > 8k case, we will leave the high part of the address
3326 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3329 store_reg (int reg, HOST_WIDE_INT disp, int base)
3331 rtx insn, dest, src, basereg;
3333 src = gen_rtx_REG (word_mode, reg);
3334 basereg = gen_rtx_REG (Pmode, base);
3335 if (VAL_14_BITS_P (disp))
3337 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3338 insn = emit_move_insn (dest, src);
3340 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3342 rtx delta = GEN_INT (disp);
3343 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3345 emit_move_insn (tmpreg, delta);
3346 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3350 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3351 gen_rtx_SET (VOIDmode, tmpreg,
3352 gen_rtx_PLUS (Pmode, basereg, delta)),
3354 RTX_FRAME_RELATED_P (insn) = 1;
3356 dest = gen_rtx_MEM (word_mode, tmpreg);
3357 insn = emit_move_insn (dest, src);
3361 rtx delta = GEN_INT (disp);
3362 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3363 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3365 emit_move_insn (tmpreg, high);
3366 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3367 insn = emit_move_insn (dest, src);
3371 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3372 gen_rtx_SET (VOIDmode,
3373 gen_rtx_MEM (word_mode,
3374 gen_rtx_PLUS (word_mode, basereg,
3382 RTX_FRAME_RELATED_P (insn) = 1;
3385 /* Emit RTL to store REG at the memory location specified by BASE and then
3386 add MOD to BASE. MOD must be <= 8k. */
3389 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3391 rtx insn, basereg, srcreg, delta;
3393 gcc_assert (VAL_14_BITS_P (mod));
3395 basereg = gen_rtx_REG (Pmode, base);
3396 srcreg = gen_rtx_REG (word_mode, reg);
3397 delta = GEN_INT (mod);
3399 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3402 RTX_FRAME_RELATED_P (insn) = 1;
3404 /* RTX_FRAME_RELATED_P must be set on each frame related set
3405 in a parallel with more than one element. */
3406 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3407 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3411 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3412 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3413 whether to add a frame note or not.
3415 In the DISP > 8k case, we leave the high part of the address in %r1.
3416 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3419 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3423 if (VAL_14_BITS_P (disp))
3425 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3426 plus_constant (gen_rtx_REG (Pmode, base), disp));
3428 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3430 rtx basereg = gen_rtx_REG (Pmode, base);
3431 rtx delta = GEN_INT (disp);
3432 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3434 emit_move_insn (tmpreg, delta);
3435 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3436 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3439 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3440 gen_rtx_SET (VOIDmode, tmpreg,
3441 gen_rtx_PLUS (Pmode, basereg, delta)),
3446 rtx basereg = gen_rtx_REG (Pmode, base);
3447 rtx delta = GEN_INT (disp);
3448 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3450 emit_move_insn (tmpreg,
3451 gen_rtx_PLUS (Pmode, basereg,
3452 gen_rtx_HIGH (Pmode, delta)));
3453 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3454 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3457 if (DO_FRAME_NOTES && note)
3458 RTX_FRAME_RELATED_P (insn) = 1;
3462 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3467 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3468 be consistent with the rounding and size calculation done here.
3469 Change them at the same time. */
3471 /* We do our own stack alignment. First, round the size of the
3472 stack locals up to a word boundary. */
3473 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3475 /* Space for previous frame pointer + filler. If any frame is
3476 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3477 waste some space here for the sake of HP compatibility. The
3478 first slot is only used when the frame pointer is needed. */
3479 if (size || frame_pointer_needed)
3480 size += STARTING_FRAME_OFFSET;
3482 /* If the current function calls __builtin_eh_return, then we need
3483 to allocate stack space for registers that will hold data for
3484 the exception handler. */
3485 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3489 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3491 size += i * UNITS_PER_WORD;
3494 /* Account for space used by the callee general register saves. */
3495 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3496 if (df_regs_ever_live_p (i))
3497 size += UNITS_PER_WORD;
3499 /* Account for space used by the callee floating point register saves. */
3500 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3501 if (df_regs_ever_live_p (i)
3502 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3506 /* We always save both halves of the FP register, so always
3507 increment the frame size by 8 bytes. */
3511 /* If any of the floating registers are saved, account for the
3512 alignment needed for the floating point register save block. */
3515 size = (size + 7) & ~7;
3520 /* The various ABIs include space for the outgoing parameters in the
3521 size of the current function's stack frame. We don't need to align
3522 for the outgoing arguments as their alignment is set by the final
3523 rounding for the frame as a whole. */
3524 size += current_function_outgoing_args_size;
3526 /* Allocate space for the fixed frame marker. This space must be
3527 allocated for any function that makes calls or allocates
3529 if (!current_function_is_leaf || size)
3530 size += TARGET_64BIT ? 48 : 32;
3532 /* Finally, round to the preferred stack boundary. */
3533 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3534 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3537 /* Generate the assembly code for function entry. FILE is a stdio
3538 stream to output the code to. SIZE is an int: how many units of
3539 temporary storage to allocate.
3541 Refer to the array `regs_ever_live' to determine which registers to
3542 save; `regs_ever_live[I]' is nonzero if register number I is ever
3543 used in the function. This function is responsible for knowing
3544 which registers should not be saved even if used. */
3546 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3547 of memory. If any fpu reg is used in the function, we allocate
3548 such a block here, at the bottom of the frame, just in case it's needed.
3550 If this function is a leaf procedure, then we may choose not
3551 to do a "save" insn. The decision about whether or not
3552 to do this is made in regclass.c. */
3555 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3557 /* The function's label and associated .PROC must never be
3558 separated and must be output *after* any profiling declarations
3559 to avoid changing spaces/subspaces within a procedure. */
3560 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3561 fputs ("\t.PROC\n", file);
3563 /* hppa_expand_prologue does the dirty work now. We just need
3564 to output the assembler directives which denote the start
3566 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3567 if (df_regs_ever_live_p (2))
3568 fputs (",CALLS,SAVE_RP", file);
3570 fputs (",NO_CALLS", file);
3572 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3573 at the beginning of the frame and that it is used as the frame
3574 pointer for the frame. We do this because our current frame
3575 layout doesn't conform to that specified in the HP runtime
3576 documentation and we need a way to indicate to programs such as
3577 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3578 isn't used by HP compilers but is supported by the assembler.
3579 However, SAVE_SP is supposed to indicate that the previous stack
3580 pointer has been saved in the frame marker. */
3581 if (frame_pointer_needed)
3582 fputs (",SAVE_SP", file);
3584 /* Pass on information about the number of callee register saves
3585 performed in the prologue.
3587 The compiler is supposed to pass the highest register number
3588 saved, the assembler then has to adjust that number before
3589 entering it into the unwind descriptor (to account for any
3590 caller saved registers with lower register numbers than the
3591 first callee saved register). */
3593 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3596 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3598 fputs ("\n\t.ENTRY\n", file);
3600 remove_useless_addtr_insns (0);
3604 hppa_expand_prologue (void)
3606 int merge_sp_adjust_with_store = 0;
3607 HOST_WIDE_INT size = get_frame_size ();
3608 HOST_WIDE_INT offset;
3616 /* Compute total size for frame pointer, filler, locals and rounding to
3617 the next word boundary. Similar code appears in compute_frame_size
3618 and must be changed in tandem with this code. */
3619 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3620 if (local_fsize || frame_pointer_needed)
3621 local_fsize += STARTING_FRAME_OFFSET;
3623 actual_fsize = compute_frame_size (size, &save_fregs);
3625 /* Compute a few things we will use often. */
3626 tmpreg = gen_rtx_REG (word_mode, 1);
3628 /* Save RP first. The calling conventions manual states RP will
3629 always be stored into the caller's frame at sp - 20 or sp - 16
3630 depending on which ABI is in use. */
3631 if (df_regs_ever_live_p (2) || current_function_calls_eh_return)
3632 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3634 /* Allocate the local frame and set up the frame pointer if needed. */
3635 if (actual_fsize != 0)
3637 if (frame_pointer_needed)
3639 /* Copy the old frame pointer temporarily into %r1. Set up the
3640 new stack pointer, then store away the saved old frame pointer
3641 into the stack at sp and at the same time update the stack
3642 pointer by actual_fsize bytes. Two versions, first
3643 handles small (<8k) frames. The second handles large (>=8k)
3645 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3647 RTX_FRAME_RELATED_P (insn) = 1;
3649 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3651 RTX_FRAME_RELATED_P (insn) = 1;
3653 if (VAL_14_BITS_P (actual_fsize))
3654 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3657 /* It is incorrect to store the saved frame pointer at *sp,
3658 then increment sp (writes beyond the current stack boundary).
3660 So instead use stwm to store at *sp and post-increment the
3661 stack pointer as an atomic operation. Then increment sp to
3662 finish allocating the new frame. */
3663 HOST_WIDE_INT adjust1 = 8192 - 64;
3664 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3666 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3667 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3671 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3672 we need to store the previous stack pointer (frame pointer)
3673 into the frame marker on targets that use the HP unwind
3674 library. This allows the HP unwind library to be used to
3675 unwind GCC frames. However, we are not fully compatible
3676 with the HP library because our frame layout differs from
3677 that specified in the HP runtime specification.
3679 We don't want a frame note on this instruction as the frame
3680 marker moves during dynamic stack allocation.
3682 This instruction also serves as a blockage to prevent
3683 register spills from being scheduled before the stack
3684 pointer is raised. This is necessary as we store
3685 registers using the frame pointer as a base register,
3686 and the frame pointer is set before sp is raised. */
3687 if (TARGET_HPUX_UNWIND_LIBRARY)
3689 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3690 GEN_INT (TARGET_64BIT ? -8 : -4));
3692 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3696 emit_insn (gen_blockage ());
3698 /* no frame pointer needed. */
3701 /* In some cases we can perform the first callee register save
3702 and allocating the stack frame at the same time. If so, just
3703 make a note of it and defer allocating the frame until saving
3704 the callee registers. */
3705 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3706 merge_sp_adjust_with_store = 1;
3707 /* Can not optimize. Adjust the stack frame by actual_fsize
3710 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3715 /* Normal register save.
3717 Do not save the frame pointer in the frame_pointer_needed case. It
3718 was done earlier. */
3719 if (frame_pointer_needed)
3721 offset = local_fsize;
3723 /* Saving the EH return data registers in the frame is the simplest
3724 way to get the frame unwind information emitted. We put them
3725 just before the general registers. */
3726 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3728 unsigned int i, regno;
3732 regno = EH_RETURN_DATA_REGNO (i);
3733 if (regno == INVALID_REGNUM)
3736 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3737 offset += UNITS_PER_WORD;
3741 for (i = 18; i >= 4; i--)
3742 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3744 store_reg (i, offset, FRAME_POINTER_REGNUM);
3745 offset += UNITS_PER_WORD;
3748 /* Account for %r3 which is saved in a special place. */
3751 /* No frame pointer needed. */
3754 offset = local_fsize - actual_fsize;
3756 /* Saving the EH return data registers in the frame is the simplest
3757 way to get the frame unwind information emitted. */
3758 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3760 unsigned int i, regno;
3764 regno = EH_RETURN_DATA_REGNO (i);
3765 if (regno == INVALID_REGNUM)
3768 /* If merge_sp_adjust_with_store is nonzero, then we can
3769 optimize the first save. */
3770 if (merge_sp_adjust_with_store)
3772 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3773 merge_sp_adjust_with_store = 0;
3776 store_reg (regno, offset, STACK_POINTER_REGNUM);
3777 offset += UNITS_PER_WORD;
3781 for (i = 18; i >= 3; i--)
3782 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3784 /* If merge_sp_adjust_with_store is nonzero, then we can
3785 optimize the first GR save. */
3786 if (merge_sp_adjust_with_store)
3788 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3789 merge_sp_adjust_with_store = 0;
3792 store_reg (i, offset, STACK_POINTER_REGNUM);
3793 offset += UNITS_PER_WORD;
3797 /* If we wanted to merge the SP adjustment with a GR save, but we never
3798 did any GR saves, then just emit the adjustment here. */
3799 if (merge_sp_adjust_with_store)
3800 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3804 /* The hppa calling conventions say that %r19, the pic offset
3805 register, is saved at sp - 32 (in this function's frame)
3806 when generating PIC code. FIXME: What is the correct thing
3807 to do for functions which make no calls and allocate no
3808 frame? Do we need to allocate a frame, or can we just omit
3809 the save? For now we'll just omit the save.
3811 We don't want a note on this insn as the frame marker can
3812 move if there is a dynamic stack allocation. */
3813 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3815 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3817 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3821 /* Align pointer properly (doubleword boundary). */
3822 offset = (offset + 7) & ~7;
3824 /* Floating point register store. */
3829 /* First get the frame or stack pointer to the start of the FP register
3831 if (frame_pointer_needed)
3833 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3834 base = frame_pointer_rtx;
3838 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3839 base = stack_pointer_rtx;
3842 /* Now actually save the FP registers. */
3843 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3845 if (df_regs_ever_live_p (i)
3846 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3848 rtx addr, insn, reg;
3849 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3850 reg = gen_rtx_REG (DFmode, i);
3851 insn = emit_move_insn (addr, reg);
3854 RTX_FRAME_RELATED_P (insn) = 1;
3857 rtx mem = gen_rtx_MEM (DFmode,
3858 plus_constant (base, offset));
3860 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3861 gen_rtx_SET (VOIDmode, mem, reg),
3866 rtx meml = gen_rtx_MEM (SFmode,
3867 plus_constant (base, offset));
3868 rtx memr = gen_rtx_MEM (SFmode,
3869 plus_constant (base, offset + 4));
3870 rtx regl = gen_rtx_REG (SFmode, i);
3871 rtx regr = gen_rtx_REG (SFmode, i + 1);
3872 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3873 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3876 RTX_FRAME_RELATED_P (setl) = 1;
3877 RTX_FRAME_RELATED_P (setr) = 1;
3878 vec = gen_rtvec (2, setl, setr);
3880 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3881 gen_rtx_SEQUENCE (VOIDmode, vec),
3885 offset += GET_MODE_SIZE (DFmode);
3892 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3893 Handle case where DISP > 8k by using the add_high_const patterns. */
3896 load_reg (int reg, HOST_WIDE_INT disp, int base)
3898 rtx dest = gen_rtx_REG (word_mode, reg);
3899 rtx basereg = gen_rtx_REG (Pmode, base);
3902 if (VAL_14_BITS_P (disp))
3903 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3904 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3906 rtx delta = GEN_INT (disp);
3907 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3909 emit_move_insn (tmpreg, delta);
3910 if (TARGET_DISABLE_INDEXING)
3912 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3913 src = gen_rtx_MEM (word_mode, tmpreg);
3916 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3920 rtx delta = GEN_INT (disp);
3921 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3922 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3924 emit_move_insn (tmpreg, high);
3925 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3928 emit_move_insn (dest, src);
3931 /* Update the total code bytes output to the text section. */
3934 update_total_code_bytes (int nbytes)
3936 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
3937 && !IN_NAMED_SECTION_P (cfun->decl))
3939 if (INSN_ADDRESSES_SET_P ())
3941 unsigned long old_total = total_code_bytes;
3943 total_code_bytes += nbytes;
3945 /* Be prepared to handle overflows. */
3946 if (old_total > total_code_bytes)
3947 total_code_bytes = -1;
3950 total_code_bytes = -1;
3954 /* This function generates the assembly code for function exit.
3955 Args are as for output_function_prologue ().
3957 The function epilogue should not depend on the current stack
3958 pointer! It should use the frame pointer only. This is mandatory
3959 because of alloca; we also take advantage of it to omit stack
3960 adjustments before returning. */
3963 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3965 rtx insn = get_last_insn ();
3969 /* hppa_expand_epilogue does the dirty work now. We just need
3970 to output the assembler directives which denote the end
3973 To make debuggers happy, emit a nop if the epilogue was completely
3974 eliminated due to a volatile call as the last insn in the
3975 current function. That way the return address (in %r2) will
3976 always point to a valid instruction in the current function. */
3978 /* Get the last real insn. */
3979 if (GET_CODE (insn) == NOTE)
3980 insn = prev_real_insn (insn);
3982 /* If it is a sequence, then look inside. */
3983 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3984 insn = XVECEXP (PATTERN (insn), 0, 0);
3986 /* If insn is a CALL_INSN, then it must be a call to a volatile
3987 function (otherwise there would be epilogue insns). */
3988 if (insn && GET_CODE (insn) == CALL_INSN)
3990 fputs ("\tnop\n", file);
3994 fputs ("\t.EXIT\n\t.PROCEND\n", file);
3996 if (TARGET_SOM && TARGET_GAS)
3998 /* We done with this subspace except possibly for some additional
3999 debug information. Forget that we are in this subspace to ensure
4000 that the next function is output in its own subspace. */
4002 cfun->machine->in_nsubspa = 2;
4005 if (INSN_ADDRESSES_SET_P ())
4007 insn = get_last_nonnote_insn ();
4008 last_address += INSN_ADDRESSES (INSN_UID (insn));
4010 last_address += insn_default_length (insn);
4011 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4012 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4015 /* Finally, update the total number of code bytes output so far. */
4016 update_total_code_bytes (last_address);
4020 hppa_expand_epilogue (void)
4023 HOST_WIDE_INT offset;
4024 HOST_WIDE_INT ret_off = 0;
4026 int merge_sp_adjust_with_load = 0;
4028 /* We will use this often. */
4029 tmpreg = gen_rtx_REG (word_mode, 1);
4031 /* Try to restore RP early to avoid load/use interlocks when
4032 RP gets used in the return (bv) instruction. This appears to still
4033 be necessary even when we schedule the prologue and epilogue. */
4034 if (df_regs_ever_live_p (2) || current_function_calls_eh_return)
4036 ret_off = TARGET_64BIT ? -16 : -20;
4037 if (frame_pointer_needed)
4039 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4044 /* No frame pointer, and stack is smaller than 8k. */
4045 if (VAL_14_BITS_P (ret_off - actual_fsize))
4047 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4053 /* General register restores. */
4054 if (frame_pointer_needed)
4056 offset = local_fsize;
4058 /* If the current function calls __builtin_eh_return, then we need
4059 to restore the saved EH data registers. */
4060 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4062 unsigned int i, regno;
4066 regno = EH_RETURN_DATA_REGNO (i);
4067 if (regno == INVALID_REGNUM)
4070 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4071 offset += UNITS_PER_WORD;
4075 for (i = 18; i >= 4; i--)
4076 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4078 load_reg (i, offset, FRAME_POINTER_REGNUM);
4079 offset += UNITS_PER_WORD;
4084 offset = local_fsize - actual_fsize;
4086 /* If the current function calls __builtin_eh_return, then we need
4087 to restore the saved EH data registers. */
4088 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4090 unsigned int i, regno;
4094 regno = EH_RETURN_DATA_REGNO (i);
4095 if (regno == INVALID_REGNUM)
4098 /* Only for the first load.
4099 merge_sp_adjust_with_load holds the register load
4100 with which we will merge the sp adjustment. */
4101 if (merge_sp_adjust_with_load == 0
4103 && VAL_14_BITS_P (-actual_fsize))
4104 merge_sp_adjust_with_load = regno;
4106 load_reg (regno, offset, STACK_POINTER_REGNUM);
4107 offset += UNITS_PER_WORD;
4111 for (i = 18; i >= 3; i--)
4113 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4115 /* Only for the first load.
4116 merge_sp_adjust_with_load holds the register load
4117 with which we will merge the sp adjustment. */
4118 if (merge_sp_adjust_with_load == 0
4120 && VAL_14_BITS_P (-actual_fsize))
4121 merge_sp_adjust_with_load = i;
4123 load_reg (i, offset, STACK_POINTER_REGNUM);
4124 offset += UNITS_PER_WORD;
4129 /* Align pointer properly (doubleword boundary). */
4130 offset = (offset + 7) & ~7;
4132 /* FP register restores. */
4135 /* Adjust the register to index off of. */
4136 if (frame_pointer_needed)
4137 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4139 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4141 /* Actually do the restores now. */
4142 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4143 if (df_regs_ever_live_p (i)
4144 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4146 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4147 rtx dest = gen_rtx_REG (DFmode, i);
4148 emit_move_insn (dest, src);
4152 /* Emit a blockage insn here to keep these insns from being moved to
4153 an earlier spot in the epilogue, or into the main instruction stream.
4155 This is necessary as we must not cut the stack back before all the
4156 restores are finished. */
4157 emit_insn (gen_blockage ());
4159 /* Reset stack pointer (and possibly frame pointer). The stack
4160 pointer is initially set to fp + 64 to avoid a race condition. */
4161 if (frame_pointer_needed)
4163 rtx delta = GEN_INT (-64);
4165 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4166 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4168 /* If we were deferring a callee register restore, do it now. */
4169 else if (merge_sp_adjust_with_load)
4171 rtx delta = GEN_INT (-actual_fsize);
4172 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4174 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4176 else if (actual_fsize != 0)
4177 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4180 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4181 frame greater than 8k), do so now. */
4183 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4185 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4187 rtx sa = EH_RETURN_STACKADJ_RTX;
4189 emit_insn (gen_blockage ());
4190 emit_insn (TARGET_64BIT
4191 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4192 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4197 hppa_pic_save_rtx (void)
4199 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4202 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4203 #define NO_DEFERRED_PROFILE_COUNTERS 0
4207 /* Vector of funcdef numbers. */
4208 static VEC(int,heap) *funcdef_nos;
4210 /* Output deferred profile counters. */
4212 output_deferred_profile_counters (void)
4217 if (VEC_empty (int, funcdef_nos))
4220 switch_to_section (data_section);
4221 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4222 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4224 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4226 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4227 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4230 VEC_free (int, heap, funcdef_nos);
4234 hppa_profile_hook (int label_no)
4236 /* We use SImode for the address of the function in both 32 and
4237 64-bit code to avoid having to provide DImode versions of the
4238 lcla2 and load_offset_label_address insn patterns. */
4239 rtx reg = gen_reg_rtx (SImode);
4240 rtx label_rtx = gen_label_rtx ();
4241 rtx begin_label_rtx, call_insn;
4242 char begin_label_name[16];
4244 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4246 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4249 emit_move_insn (arg_pointer_rtx,
4250 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4253 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4255 /* The address of the function is loaded into %r25 with an instruction-
4256 relative sequence that avoids the use of relocations. The sequence
4257 is split so that the load_offset_label_address instruction can
4258 occupy the delay slot of the call to _mcount. */
4260 emit_insn (gen_lcla2 (reg, label_rtx));
4262 emit_insn (gen_lcla1 (reg, label_rtx));
4264 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4265 reg, begin_label_rtx, label_rtx));
4267 #if !NO_DEFERRED_PROFILE_COUNTERS
4269 rtx count_label_rtx, addr, r24;
4270 char count_label_name[16];
4272 VEC_safe_push (int, heap, funcdef_nos, label_no);
4273 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4274 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4276 addr = force_reg (Pmode, count_label_rtx);
4277 r24 = gen_rtx_REG (Pmode, 24);
4278 emit_move_insn (r24, addr);
4281 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4282 gen_rtx_SYMBOL_REF (Pmode,
4284 GEN_INT (TARGET_64BIT ? 24 : 12)));
4286 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4291 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4292 gen_rtx_SYMBOL_REF (Pmode,
4294 GEN_INT (TARGET_64BIT ? 16 : 8)));
4298 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4299 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4301 /* Indicate the _mcount call cannot throw, nor will it execute a
4303 REG_NOTES (call_insn)
4304 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
4307 /* Fetch the return address for the frame COUNT steps up from
4308 the current frame, after the prologue. FRAMEADDR is the
4309 frame pointer of the COUNT frame.
4311 We want to ignore any export stub remnants here. To handle this,
4312 we examine the code at the return address, and if it is an export
4313 stub, we return a memory rtx for the stub return address stored
4316 The value returned is used in two different ways:
4318 1. To find a function's caller.
4320 2. To change the return address for a function.
4322 This function handles most instances of case 1; however, it will
4323 fail if there are two levels of stubs to execute on the return
4324 path. The only way I believe that can happen is if the return value
4325 needs a parameter relocation, which never happens for C code.
4327 This function handles most instances of case 2; however, it will
4328 fail if we did not originally have stub code on the return path
4329 but will need stub code on the new return path. This can happen if
4330 the caller & callee are both in the main program, but the new
4331 return location is in a shared library. */
4334 return_addr_rtx (int count, rtx frameaddr)
4344 rp = get_hard_reg_initial_val (Pmode, 2);
4346 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4349 saved_rp = gen_reg_rtx (Pmode);
4350 emit_move_insn (saved_rp, rp);
4352 /* Get pointer to the instruction stream. We have to mask out the
4353 privilege level from the two low order bits of the return address
4354 pointer here so that ins will point to the start of the first
4355 instruction that would have been executed if we returned. */
4356 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4357 label = gen_label_rtx ();
4359 /* Check the instruction stream at the normal return address for the
4362 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4363 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4364 0x00011820 | stub+16: mtsp r1,sr0
4365 0xe0400002 | stub+20: be,n 0(sr0,rp)
4367 If it is an export stub, than our return address is really in
4370 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4371 NULL_RTX, SImode, 1);
4372 emit_jump_insn (gen_bne (label));
4374 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4375 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4376 emit_jump_insn (gen_bne (label));
4378 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4379 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4380 emit_jump_insn (gen_bne (label));
4382 /* 0xe0400002 must be specified as -532676606 so that it won't be
4383 rejected as an invalid immediate operand on 64-bit hosts. */
4384 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4385 GEN_INT (-532676606), NE, NULL_RTX, SImode, 1);
4387 /* If there is no export stub then just use the value saved from
4388 the return pointer register. */
4390 emit_jump_insn (gen_bne (label));
4392 /* Here we know that our return address points to an export
4393 stub. We don't want to return the address of the export stub,
4394 but rather the return address of the export stub. That return
4395 address is stored at -24[frameaddr]. */
4397 emit_move_insn (saved_rp,
4399 memory_address (Pmode,
4400 plus_constant (frameaddr,
4408 emit_bcond_fp (enum rtx_code code, rtx operand0)
4410 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4411 gen_rtx_IF_THEN_ELSE (VOIDmode,
4412 gen_rtx_fmt_ee (code,
4414 gen_rtx_REG (CCFPmode, 0),
4416 gen_rtx_LABEL_REF (VOIDmode, operand0),
4422 gen_cmp_fp (enum rtx_code code, rtx operand0, rtx operand1)
4424 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4425 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4428 /* Adjust the cost of a scheduling dependency. Return the new cost of
4429 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4432 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4434 enum attr_type attr_type;
4436 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4437 true dependencies as they are described with bypasses now. */
4438 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4441 if (! recog_memoized (insn))
4444 attr_type = get_attr_type (insn);
4446 switch (REG_NOTE_KIND (link))
4449 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4452 if (attr_type == TYPE_FPLOAD)
4454 rtx pat = PATTERN (insn);
4455 rtx dep_pat = PATTERN (dep_insn);
4456 if (GET_CODE (pat) == PARALLEL)
4458 /* This happens for the fldXs,mb patterns. */
4459 pat = XVECEXP (pat, 0, 0);
4461 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4462 /* If this happens, we have to extend this to schedule
4463 optimally. Return 0 for now. */
4466 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4468 if (! recog_memoized (dep_insn))
4470 switch (get_attr_type (dep_insn))
4477 case TYPE_FPSQRTSGL:
4478 case TYPE_FPSQRTDBL:
4479 /* A fpload can't be issued until one cycle before a
4480 preceding arithmetic operation has finished if
4481 the target of the fpload is any of the sources
4482 (or destination) of the arithmetic operation. */
4483 return insn_default_latency (dep_insn) - 1;
4490 else if (attr_type == TYPE_FPALU)
4492 rtx pat = PATTERN (insn);
4493 rtx dep_pat = PATTERN (dep_insn);
4494 if (GET_CODE (pat) == PARALLEL)
4496 /* This happens for the fldXs,mb patterns. */
4497 pat = XVECEXP (pat, 0, 0);
4499 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4500 /* If this happens, we have to extend this to schedule
4501 optimally. Return 0 for now. */
4504 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4506 if (! recog_memoized (dep_insn))
4508 switch (get_attr_type (dep_insn))
4512 case TYPE_FPSQRTSGL:
4513 case TYPE_FPSQRTDBL:
4514 /* An ALU flop can't be issued until two cycles before a
4515 preceding divide or sqrt operation has finished if
4516 the target of the ALU flop is any of the sources
4517 (or destination) of the divide or sqrt operation. */
4518 return insn_default_latency (dep_insn) - 2;
4526 /* For other anti dependencies, the cost is 0. */
4529 case REG_DEP_OUTPUT:
4530 /* Output dependency; DEP_INSN writes a register that INSN writes some
4532 if (attr_type == TYPE_FPLOAD)
4534 rtx pat = PATTERN (insn);
4535 rtx dep_pat = PATTERN (dep_insn);
4536 if (GET_CODE (pat) == PARALLEL)
4538 /* This happens for the fldXs,mb patterns. */
4539 pat = XVECEXP (pat, 0, 0);
4541 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4542 /* If this happens, we have to extend this to schedule
4543 optimally. Return 0 for now. */
4546 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4548 if (! recog_memoized (dep_insn))
4550 switch (get_attr_type (dep_insn))
4557 case TYPE_FPSQRTSGL:
4558 case TYPE_FPSQRTDBL:
4559 /* A fpload can't be issued until one cycle before a
4560 preceding arithmetic operation has finished if
4561 the target of the fpload is the destination of the
4562 arithmetic operation.
4564 Exception: For PA7100LC, PA7200 and PA7300, the cost
4565 is 3 cycles, unless they bundle together. We also
4566 pay the penalty if the second insn is a fpload. */
4567 return insn_default_latency (dep_insn) - 1;
4574 else if (attr_type == TYPE_FPALU)
4576 rtx pat = PATTERN (insn);
4577 rtx dep_pat = PATTERN (dep_insn);
4578 if (GET_CODE (pat) == PARALLEL)
4580 /* This happens for the fldXs,mb patterns. */
4581 pat = XVECEXP (pat, 0, 0);
4583 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4584 /* If this happens, we have to extend this to schedule
4585 optimally. Return 0 for now. */
4588 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4590 if (! recog_memoized (dep_insn))
4592 switch (get_attr_type (dep_insn))
4596 case TYPE_FPSQRTSGL:
4597 case TYPE_FPSQRTDBL:
4598 /* An ALU flop can't be issued until two cycles before a
4599 preceding divide or sqrt operation has finished if
4600 the target of the ALU flop is also the target of
4601 the divide or sqrt operation. */
4602 return insn_default_latency (dep_insn) - 2;
4610 /* For other output dependencies, the cost is 0. */
4618 /* Adjust scheduling priorities. We use this to try and keep addil
4619 and the next use of %r1 close together. */
4621 pa_adjust_priority (rtx insn, int priority)
4623 rtx set = single_set (insn);
4627 src = SET_SRC (set);
4628 dest = SET_DEST (set);
4629 if (GET_CODE (src) == LO_SUM
4630 && symbolic_operand (XEXP (src, 1), VOIDmode)
4631 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4634 else if (GET_CODE (src) == MEM
4635 && GET_CODE (XEXP (src, 0)) == LO_SUM
4636 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4637 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4640 else if (GET_CODE (dest) == MEM
4641 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4642 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4643 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4649 /* The 700 can only issue a single insn at a time.
4650 The 7XXX processors can issue two insns at a time.
4651 The 8000 can issue 4 insns at a time. */
4653 pa_issue_rate (void)
4657 case PROCESSOR_700: return 1;
4658 case PROCESSOR_7100: return 2;
4659 case PROCESSOR_7100LC: return 2;
4660 case PROCESSOR_7200: return 2;
4661 case PROCESSOR_7300: return 2;
4662 case PROCESSOR_8000: return 4;
4671 /* Return any length adjustment needed by INSN which already has its length
4672 computed as LENGTH. Return zero if no adjustment is necessary.
4674 For the PA: function calls, millicode calls, and backwards short
4675 conditional branches with unfilled delay slots need an adjustment by +1
4676 (to account for the NOP which will be inserted into the instruction stream).
4678 Also compute the length of an inline block move here as it is too
4679 complicated to express as a length attribute in pa.md. */
4681 pa_adjust_insn_length (rtx insn, int length)
4683 rtx pat = PATTERN (insn);
4685 /* Jumps inside switch tables which have unfilled delay slots need
4687 if (GET_CODE (insn) == JUMP_INSN
4688 && GET_CODE (pat) == PARALLEL
4689 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4691 /* Millicode insn with an unfilled delay slot. */
4692 else if (GET_CODE (insn) == INSN
4693 && GET_CODE (pat) != SEQUENCE
4694 && GET_CODE (pat) != USE
4695 && GET_CODE (pat) != CLOBBER
4696 && get_attr_type (insn) == TYPE_MILLI)
4698 /* Block move pattern. */
4699 else if (GET_CODE (insn) == INSN
4700 && GET_CODE (pat) == PARALLEL
4701 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4702 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4703 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4704 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4705 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4706 return compute_movmem_length (insn) - 4;
4707 /* Block clear pattern. */
4708 else if (GET_CODE (insn) == INSN
4709 && GET_CODE (pat) == PARALLEL
4710 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4711 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4712 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4713 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4714 return compute_clrmem_length (insn) - 4;
4715 /* Conditional branch with an unfilled delay slot. */
4716 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4718 /* Adjust a short backwards conditional with an unfilled delay slot. */
4719 if (GET_CODE (pat) == SET
4721 && ! forward_branch_p (insn))
4723 else if (GET_CODE (pat) == PARALLEL
4724 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4727 /* Adjust dbra insn with short backwards conditional branch with
4728 unfilled delay slot -- only for case where counter is in a
4729 general register register. */
4730 else if (GET_CODE (pat) == PARALLEL
4731 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4732 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4733 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4735 && ! forward_branch_p (insn))
4743 /* Print operand X (an rtx) in assembler syntax to file FILE.
4744 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4745 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4748 print_operand (FILE *file, rtx x, int code)
4753 /* Output a 'nop' if there's nothing for the delay slot. */
4754 if (dbr_sequence_length () == 0)
4755 fputs ("\n\tnop", file);
4758 /* Output a nullification completer if there's nothing for the */
4759 /* delay slot or nullification is requested. */
4760 if (dbr_sequence_length () == 0 ||
4762 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4766 /* Print out the second register name of a register pair.
4767 I.e., R (6) => 7. */
4768 fputs (reg_names[REGNO (x) + 1], file);
4771 /* A register or zero. */
4773 || (x == CONST0_RTX (DFmode))
4774 || (x == CONST0_RTX (SFmode)))
4776 fputs ("%r0", file);
4782 /* A register or zero (floating point). */
4784 || (x == CONST0_RTX (DFmode))
4785 || (x == CONST0_RTX (SFmode)))
4787 fputs ("%fr0", file);
4796 xoperands[0] = XEXP (XEXP (x, 0), 0);
4797 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4798 output_global_address (file, xoperands[1], 0);
4799 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4803 case 'C': /* Plain (C)ondition */
4805 switch (GET_CODE (x))
4808 fputs ("=", file); break;
4810 fputs ("<>", file); break;
4812 fputs (">", file); break;
4814 fputs (">=", file); break;
4816 fputs (">>=", file); break;
4818 fputs (">>", file); break;
4820 fputs ("<", file); break;
4822 fputs ("<=", file); break;
4824 fputs ("<<=", file); break;
4826 fputs ("<<", file); break;
4831 case 'N': /* Condition, (N)egated */
4832 switch (GET_CODE (x))
4835 fputs ("<>", file); break;
4837 fputs ("=", file); break;
4839 fputs ("<=", file); break;
4841 fputs ("<", file); break;
4843 fputs ("<<", file); break;
4845 fputs ("<<=", file); break;
4847 fputs (">=", file); break;
4849 fputs (">", file); break;
4851 fputs (">>", file); break;
4853 fputs (">>=", file); break;
4858 /* For floating point comparisons. Note that the output
4859 predicates are the complement of the desired mode. The
4860 conditions for GT, GE, LT, LE and LTGT cause an invalid
4861 operation exception if the result is unordered and this
4862 exception is enabled in the floating-point status register. */
4864 switch (GET_CODE (x))
4867 fputs ("!=", file); break;
4869 fputs ("=", file); break;
4871 fputs ("!>", file); break;
4873 fputs ("!>=", file); break;
4875 fputs ("!<", file); break;
4877 fputs ("!<=", file); break;
4879 fputs ("!<>", file); break;
4881 fputs ("!?<=", file); break;
4883 fputs ("!?<", file); break;
4885 fputs ("!?>=", file); break;
4887 fputs ("!?>", file); break;
4889 fputs ("!?=", file); break;
4891 fputs ("!?", file); break;
4893 fputs ("?", file); break;
4898 case 'S': /* Condition, operands are (S)wapped. */
4899 switch (GET_CODE (x))
4902 fputs ("=", file); break;
4904 fputs ("<>", file); break;
4906 fputs ("<", file); break;
4908 fputs ("<=", file); break;
4910 fputs ("<<=", file); break;
4912 fputs ("<<", file); break;
4914 fputs (">", file); break;
4916 fputs (">=", file); break;
4918 fputs (">>=", file); break;
4920 fputs (">>", file); break;
4925 case 'B': /* Condition, (B)oth swapped and negate. */
4926 switch (GET_CODE (x))
4929 fputs ("<>", file); break;
4931 fputs ("=", file); break;
4933 fputs (">=", file); break;
4935 fputs (">", file); break;
4937 fputs (">>", file); break;
4939 fputs (">>=", file); break;
4941 fputs ("<=", file); break;
4943 fputs ("<", file); break;
4945 fputs ("<<", file); break;
4947 fputs ("<<=", file); break;
4953 gcc_assert (GET_CODE (x) == CONST_INT);
4954 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4957 gcc_assert (GET_CODE (x) == CONST_INT);
4958 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4961 gcc_assert (GET_CODE (x) == CONST_INT);
4962 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4965 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
4966 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4969 gcc_assert (GET_CODE (x) == CONST_INT);
4970 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4973 gcc_assert (GET_CODE (x) == CONST_INT);
4974 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4977 if (GET_CODE (x) == CONST_INT)
4982 switch (GET_CODE (XEXP (x, 0)))
4986 if (ASSEMBLER_DIALECT == 0)
4987 fputs ("s,mb", file);
4989 fputs (",mb", file);
4993 if (ASSEMBLER_DIALECT == 0)
4994 fputs ("s,ma", file);
4996 fputs (",ma", file);
4999 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5000 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5002 if (ASSEMBLER_DIALECT == 0)
5005 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5006 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5008 if (ASSEMBLER_DIALECT == 0)
5009 fputs ("x,s", file);
5013 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5017 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5023 output_global_address (file, x, 0);
5026 output_global_address (file, x, 1);
5028 case 0: /* Don't do anything special */
5033 compute_zdepwi_operands (INTVAL (x), op);
5034 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5040 compute_zdepdi_operands (INTVAL (x), op);
5041 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5045 /* We can get here from a .vtable_inherit due to our
5046 CONSTANT_ADDRESS_P rejecting perfectly good constant
5052 if (GET_CODE (x) == REG)
5054 fputs (reg_names [REGNO (x)], file);
5055 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5061 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5062 && (REGNO (x) & 1) == 0)
5065 else if (GET_CODE (x) == MEM)
5067 int size = GET_MODE_SIZE (GET_MODE (x));
5068 rtx base = NULL_RTX;
5069 switch (GET_CODE (XEXP (x, 0)))
5073 base = XEXP (XEXP (x, 0), 0);
5074 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5078 base = XEXP (XEXP (x, 0), 0);
5079 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5082 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5083 fprintf (file, "%s(%s)",
5084 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5085 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5086 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5087 fprintf (file, "%s(%s)",
5088 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5089 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5090 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5091 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5093 /* Because the REG_POINTER flag can get lost during reload,
5094 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5095 index and base registers in the combined move patterns. */
5096 rtx base = XEXP (XEXP (x, 0), 1);
5097 rtx index = XEXP (XEXP (x, 0), 0);
5099 fprintf (file, "%s(%s)",
5100 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5103 output_address (XEXP (x, 0));
5106 output_address (XEXP (x, 0));
5111 output_addr_const (file, x);
5114 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5117 output_global_address (FILE *file, rtx x, int round_constant)
5120 /* Imagine (high (const (plus ...))). */
5121 if (GET_CODE (x) == HIGH)
5124 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5125 output_addr_const (file, x);
5126 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5128 output_addr_const (file, x);
5129 fputs ("-$global$", file);
5131 else if (GET_CODE (x) == CONST)
5133 const char *sep = "";
5134 int offset = 0; /* assembler wants -$global$ at end */
5135 rtx base = NULL_RTX;
5137 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5140 base = XEXP (XEXP (x, 0), 0);
5141 output_addr_const (file, base);
5144 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5150 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5153 base = XEXP (XEXP (x, 0), 1);
5154 output_addr_const (file, base);
5157 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5163 /* How bogus. The compiler is apparently responsible for
5164 rounding the constant if it uses an LR field selector.
5166 The linker and/or assembler seem a better place since
5167 they have to do this kind of thing already.
5169 If we fail to do this, HP's optimizing linker may eliminate
5170 an addil, but not update the ldw/stw/ldo instruction that
5171 uses the result of the addil. */
5173 offset = ((offset + 0x1000) & ~0x1fff);
5175 switch (GET_CODE (XEXP (x, 0)))
5188 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5196 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5197 fputs ("-$global$", file);
5199 fprintf (file, "%s%d", sep, offset);
5202 output_addr_const (file, x);
5205 /* Output boilerplate text to appear at the beginning of the file.
5206 There are several possible versions. */
5207 #define aputs(x) fputs(x, asm_out_file)
5209 pa_file_start_level (void)
5212 aputs ("\t.LEVEL 2.0w\n");
5213 else if (TARGET_PA_20)
5214 aputs ("\t.LEVEL 2.0\n");
5215 else if (TARGET_PA_11)
5216 aputs ("\t.LEVEL 1.1\n");
5218 aputs ("\t.LEVEL 1.0\n");
5222 pa_file_start_space (int sortspace)
5224 aputs ("\t.SPACE $PRIVATE$");
5227 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5228 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5229 "\n\t.SPACE $TEXT$");
5232 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5233 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5237 pa_file_start_file (int want_version)
5239 if (write_symbols != NO_DEBUG)
5241 output_file_directive (asm_out_file, main_input_filename);
5243 aputs ("\t.version\t\"01.01\"\n");
5248 pa_file_start_mcount (const char *aswhat)
5251 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5255 pa_elf_file_start (void)
5257 pa_file_start_level ();
5258 pa_file_start_mcount ("ENTRY");
5259 pa_file_start_file (0);
5263 pa_som_file_start (void)
5265 pa_file_start_level ();
5266 pa_file_start_space (0);
5267 aputs ("\t.IMPORT $global$,DATA\n"
5268 "\t.IMPORT $$dyncall,MILLICODE\n");
5269 pa_file_start_mcount ("CODE");
5270 pa_file_start_file (0);
5274 pa_linux_file_start (void)
5276 pa_file_start_file (1);
5277 pa_file_start_level ();
5278 pa_file_start_mcount ("CODE");
5282 pa_hpux64_gas_file_start (void)
5284 pa_file_start_level ();
5285 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5287 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5289 pa_file_start_file (1);
5293 pa_hpux64_hpas_file_start (void)
5295 pa_file_start_level ();
5296 pa_file_start_space (1);
5297 pa_file_start_mcount ("CODE");
5298 pa_file_start_file (0);
5302 /* Search the deferred plabel list for SYMBOL and return its internal
5303 label. If an entry for SYMBOL is not found, a new entry is created. */
5306 get_deferred_plabel (rtx symbol)
5308 const char *fname = XSTR (symbol, 0);
5311 /* See if we have already put this function on the list of deferred
5312 plabels. This list is generally small, so a liner search is not
5313 too ugly. If it proves too slow replace it with something faster. */
5314 for (i = 0; i < n_deferred_plabels; i++)
5315 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5318 /* If the deferred plabel list is empty, or this entry was not found
5319 on the list, create a new entry on the list. */
5320 if (deferred_plabels == NULL || i == n_deferred_plabels)
5324 if (deferred_plabels == 0)
5325 deferred_plabels = (struct deferred_plabel *)
5326 ggc_alloc (sizeof (struct deferred_plabel));
5328 deferred_plabels = (struct deferred_plabel *)
5329 ggc_realloc (deferred_plabels,
5330 ((n_deferred_plabels + 1)
5331 * sizeof (struct deferred_plabel)));
5333 i = n_deferred_plabels++;
5334 deferred_plabels[i].internal_label = gen_label_rtx ();
5335 deferred_plabels[i].symbol = symbol;
5337 /* Gross. We have just implicitly taken the address of this
5338 function. Mark it in the same manner as assemble_name. */
5339 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5341 mark_referenced (id);
5344 return deferred_plabels[i].internal_label;
5348 output_deferred_plabels (void)
5352 /* If we have some deferred plabels, then we need to switch into the
5353 data or readonly data section, and align it to a 4 byte boundary
5354 before outputting the deferred plabels. */
5355 if (n_deferred_plabels)
5357 switch_to_section (flag_pic ? data_section : readonly_data_section);
5358 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5361 /* Now output the deferred plabels. */
5362 for (i = 0; i < n_deferred_plabels; i++)
5364 targetm.asm_out.internal_label (asm_out_file, "L",
5365 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5366 assemble_integer (deferred_plabels[i].symbol,
5367 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5371 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5372 /* Initialize optabs to point to HPUX long double emulation routines. */
5374 pa_hpux_init_libfuncs (void)
5376 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5377 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5378 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5379 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5380 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5381 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5382 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5383 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5384 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5386 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5387 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5388 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5389 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5390 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5391 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5392 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5394 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5395 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5396 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5397 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5399 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5400 ? "__U_Qfcnvfxt_quad_to_sgl"
5401 : "_U_Qfcnvfxt_quad_to_sgl");
5402 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5403 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5404 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5406 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5407 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5408 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5409 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5413 /* HP's millicode routines mean something special to the assembler.
5414 Keep track of which ones we have used. */
5416 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5417 static void import_milli (enum millicodes);
5418 static char imported[(int) end1000];
5419 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5420 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5421 #define MILLI_START 10
5424 import_milli (enum millicodes code)
5426 char str[sizeof (import_string)];
5428 if (!imported[(int) code])
5430 imported[(int) code] = 1;
5431 strcpy (str, import_string);
5432 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5433 output_asm_insn (str, 0);
5437 /* The register constraints have put the operands and return value in
5438 the proper registers. */
5441 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5443 import_milli (mulI);
5444 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5447 /* Emit the rtl for doing a division by a constant. */
5449 /* Do magic division millicodes exist for this value? */
5450 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5452 /* We'll use an array to keep track of the magic millicodes and
5453 whether or not we've used them already. [n][0] is signed, [n][1] is
5456 static int div_milli[16][2];
5459 emit_hpdiv_const (rtx *operands, int unsignedp)
5461 if (GET_CODE (operands[2]) == CONST_INT
5462 && INTVAL (operands[2]) > 0
5463 && INTVAL (operands[2]) < 16
5464 && magic_milli[INTVAL (operands[2])])
5466 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5468 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5472 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5473 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5475 gen_rtx_REG (SImode, 26),
5477 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5478 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5479 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5480 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5481 gen_rtx_CLOBBER (VOIDmode, ret))));
5482 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5489 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5493 /* If the divisor is a constant, try to use one of the special
5495 if (GET_CODE (operands[0]) == CONST_INT)
5497 static char buf[100];
5498 divisor = INTVAL (operands[0]);
5499 if (!div_milli[divisor][unsignedp])
5501 div_milli[divisor][unsignedp] = 1;
5503 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5505 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5509 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5510 INTVAL (operands[0]));
5511 return output_millicode_call (insn,
5512 gen_rtx_SYMBOL_REF (SImode, buf));
5516 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5517 INTVAL (operands[0]));
5518 return output_millicode_call (insn,
5519 gen_rtx_SYMBOL_REF (SImode, buf));
5522 /* Divisor isn't a special constant. */
5527 import_milli (divU);
5528 return output_millicode_call (insn,
5529 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5533 import_milli (divI);
5534 return output_millicode_call (insn,
5535 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5540 /* Output a $$rem millicode to do mod. */
5543 output_mod_insn (int unsignedp, rtx insn)
5547 import_milli (remU);
5548 return output_millicode_call (insn,
5549 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5553 import_milli (remI);
5554 return output_millicode_call (insn,
5555 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5560 output_arg_descriptor (rtx call_insn)
5562 const char *arg_regs[4];
5563 enum machine_mode arg_mode;
5565 int i, output_flag = 0;
5568 /* We neither need nor want argument location descriptors for the
5569 64bit runtime environment or the ELF32 environment. */
5570 if (TARGET_64BIT || TARGET_ELF32)
5573 for (i = 0; i < 4; i++)
5576 /* Specify explicitly that no argument relocations should take place
5577 if using the portable runtime calling conventions. */
5578 if (TARGET_PORTABLE_RUNTIME)
5580 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5585 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5586 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5587 link; link = XEXP (link, 1))
5589 rtx use = XEXP (link, 0);
5591 if (! (GET_CODE (use) == USE
5592 && GET_CODE (XEXP (use, 0)) == REG
5593 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5596 arg_mode = GET_MODE (XEXP (use, 0));
5597 regno = REGNO (XEXP (use, 0));
5598 if (regno >= 23 && regno <= 26)
5600 arg_regs[26 - regno] = "GR";
5601 if (arg_mode == DImode)
5602 arg_regs[25 - regno] = "GR";
5604 else if (regno >= 32 && regno <= 39)
5606 if (arg_mode == SFmode)
5607 arg_regs[(regno - 32) / 2] = "FR";
5610 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5611 arg_regs[(regno - 34) / 2] = "FR";
5612 arg_regs[(regno - 34) / 2 + 1] = "FU";
5614 arg_regs[(regno - 34) / 2] = "FU";
5615 arg_regs[(regno - 34) / 2 + 1] = "FR";
5620 fputs ("\t.CALL ", asm_out_file);
5621 for (i = 0; i < 4; i++)
5626 fputc (',', asm_out_file);
5627 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5630 fputc ('\n', asm_out_file);
5633 static enum reg_class
5634 pa_secondary_reload (bool in_p, rtx x, enum reg_class class,
5635 enum machine_mode mode, secondary_reload_info *sri)
5637 int is_symbolic, regno;
5639 /* Handle the easy stuff first. */
5640 if (class == R1_REGS)
5646 if (class == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5652 /* If we have something like (mem (mem (...)), we can safely assume the
5653 inner MEM will end up in a general register after reloading, so there's
5654 no need for a secondary reload. */
5655 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5658 /* Trying to load a constant into a FP register during PIC code
5659 generation requires %r1 as a scratch register. */
5661 && (mode == SImode || mode == DImode)
5662 && FP_REG_CLASS_P (class)
5663 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5665 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5666 : CODE_FOR_reload_indi_r1);
5670 /* Profiling showed the PA port spends about 1.3% of its compilation
5671 time in true_regnum from calls inside pa_secondary_reload_class. */
5672 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5673 regno = true_regnum (x);
5675 /* Handle out of range displacement for integer mode loads/stores of
5677 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5678 && GET_MODE_CLASS (mode) == MODE_INT
5679 && FP_REG_CLASS_P (class))
5680 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5682 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5686 /* A SAR<->FP register copy requires a secondary register (GPR) as
5687 well as secondary memory. */
5688 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5689 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5690 || (class == SHIFT_REGS
5691 && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5693 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5697 /* Secondary reloads of symbolic operands require %r1 as a scratch
5698 register when we're generating PIC code and the operand isn't
5700 if (GET_CODE (x) == HIGH)
5703 /* Profiling has showed GCC spends about 2.6% of its compilation
5704 time in symbolic_operand from calls inside pa_secondary_reload_class.
5705 So, we use an inline copy to avoid useless work. */
5706 switch (GET_CODE (x))
5711 is_symbolic = !SYMBOL_REF_TLS_MODEL (x);
5718 is_symbolic = (((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
5719 && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
5720 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
5721 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5728 if (is_symbolic && (flag_pic || !read_only_operand (x, VOIDmode)))
5730 gcc_assert (mode == SImode || mode == DImode);
5731 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5732 : CODE_FOR_reload_indi_r1);
5738 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5739 by invisible reference. As a GCC extension, we also pass anything
5740 with a zero or variable size by reference.
5742 The 64-bit runtime does not describe passing any types by invisible
5743 reference. The internals of GCC can't currently handle passing
5744 empty structures, and zero or variable length arrays when they are
5745 not passed entirely on the stack or by reference. Thus, as a GCC
5746 extension, we pass these types by reference. The HP compiler doesn't
5747 support these types, so hopefully there shouldn't be any compatibility
5748 issues. This may have to be revisited when HP releases a C99 compiler
5749 or updates the ABI. */
5752 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5753 enum machine_mode mode, tree type,
5754 bool named ATTRIBUTE_UNUSED)
5759 size = int_size_in_bytes (type);
5761 size = GET_MODE_SIZE (mode);
5766 return size <= 0 || size > 8;
5770 function_arg_padding (enum machine_mode mode, tree type)
5773 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5775 /* Return none if justification is not required. */
5777 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5778 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5781 /* The directions set here are ignored when a BLKmode argument larger
5782 than a word is placed in a register. Different code is used for
5783 the stack and registers. This makes it difficult to have a
5784 consistent data representation for both the stack and registers.
5785 For both runtimes, the justification and padding for arguments on
5786 the stack and in registers should be identical. */
5788 /* The 64-bit runtime specifies left justification for aggregates. */
5791 /* The 32-bit runtime architecture specifies right justification.
5792 When the argument is passed on the stack, the argument is padded
5793 with garbage on the left. The HP compiler pads with zeros. */
5797 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5804 /* Do what is necessary for `va_start'. We look at the current function
5805 to determine if stdargs or varargs is used and fill in an initial
5806 va_list. A pointer to this constructor is returned. */
5809 hppa_builtin_saveregs (void)
5812 tree fntype = TREE_TYPE (current_function_decl);
5813 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5814 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5815 != void_type_node)))
5816 ? UNITS_PER_WORD : 0);
5819 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5821 offset = current_function_arg_offset_rtx;
5827 /* Adjust for varargs/stdarg differences. */
5829 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5831 offset = current_function_arg_offset_rtx;
5833 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5834 from the incoming arg pointer and growing to larger addresses. */
5835 for (i = 26, off = -64; i >= 19; i--, off += 8)
5836 emit_move_insn (gen_rtx_MEM (word_mode,
5837 plus_constant (arg_pointer_rtx, off)),
5838 gen_rtx_REG (word_mode, i));
5840 /* The incoming args pointer points just beyond the flushback area;
5841 normally this is not a serious concern. However, when we are doing
5842 varargs/stdargs we want to make the arg pointer point to the start
5843 of the incoming argument area. */
5844 emit_move_insn (virtual_incoming_args_rtx,
5845 plus_constant (arg_pointer_rtx, -64));
5847 /* Now return a pointer to the first anonymous argument. */
5848 return copy_to_reg (expand_binop (Pmode, add_optab,
5849 virtual_incoming_args_rtx,
5850 offset, 0, 0, OPTAB_LIB_WIDEN));
5853 /* Store general registers on the stack. */
5854 dest = gen_rtx_MEM (BLKmode,
5855 plus_constant (current_function_internal_arg_pointer,
5857 set_mem_alias_set (dest, get_varargs_alias_set ());
5858 set_mem_align (dest, BITS_PER_WORD);
5859 move_block_from_reg (23, dest, 4);
5861 /* move_block_from_reg will emit code to store the argument registers
5862 individually as scalar stores.
5864 However, other insns may later load from the same addresses for
5865 a structure load (passing a struct to a varargs routine).
5867 The alias code assumes that such aliasing can never happen, so we
5868 have to keep memory referencing insns from moving up beyond the
5869 last argument register store. So we emit a blockage insn here. */
5870 emit_insn (gen_blockage ());
5872 return copy_to_reg (expand_binop (Pmode, add_optab,
5873 current_function_internal_arg_pointer,
5874 offset, 0, 0, OPTAB_LIB_WIDEN));
5878 hppa_va_start (tree valist, rtx nextarg)
5880 nextarg = expand_builtin_saveregs ();
5881 std_expand_builtin_va_start (valist, nextarg);
5885 hppa_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p, tree *post_p)
5889 /* Args grow upward. We can use the generic routines. */
5890 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5892 else /* !TARGET_64BIT */
5894 tree ptr = build_pointer_type (type);
5897 unsigned int size, ofs;
5900 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
5904 ptr = build_pointer_type (type);
5906 size = int_size_in_bytes (type);
5907 valist_type = TREE_TYPE (valist);
5909 /* Args grow down. Not handled by generic routines. */
5911 u = fold_convert (sizetype, size_in_bytes (type));
5912 u = fold_build1 (NEGATE_EXPR, sizetype, u);
5913 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
5915 /* Copied from va-pa.h, but we probably don't need to align to
5916 word size, since we generate and preserve that invariant. */
5917 u = size_int (size > 4 ? -8 : -4);
5918 t = fold_convert (sizetype, t);
5919 t = build2 (BIT_AND_EXPR, sizetype, t, u);
5920 t = fold_convert (valist_type, t);
5922 t = build2 (MODIFY_EXPR, valist_type, valist, t);
5924 ofs = (8 - size) % 4;
5928 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
5931 t = fold_convert (ptr, t);
5932 t = build_va_arg_indirect_ref (t);
5935 t = build_va_arg_indirect_ref (t);
5941 /* True if MODE is valid for the target. By "valid", we mean able to
5942 be manipulated in non-trivial ways. In particular, this means all
5943 the arithmetic is supported.
5945 Currently, TImode is not valid as the HP 64-bit runtime documentation
5946 doesn't document the alignment and calling conventions for this type.
5947 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
5948 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
5951 pa_scalar_mode_supported_p (enum machine_mode mode)
5953 int precision = GET_MODE_PRECISION (mode);
5955 switch (GET_MODE_CLASS (mode))
5957 case MODE_PARTIAL_INT:
5959 if (precision == CHAR_TYPE_SIZE)
5961 if (precision == SHORT_TYPE_SIZE)
5963 if (precision == INT_TYPE_SIZE)
5965 if (precision == LONG_TYPE_SIZE)
5967 if (precision == LONG_LONG_TYPE_SIZE)
5972 if (precision == FLOAT_TYPE_SIZE)
5974 if (precision == DOUBLE_TYPE_SIZE)
5976 if (precision == LONG_DOUBLE_TYPE_SIZE)
5980 case MODE_DECIMAL_FLOAT:
5988 /* This routine handles all the normal conditional branch sequences we
5989 might need to generate. It handles compare immediate vs compare
5990 register, nullification of delay slots, varying length branches,
5991 negated branches, and all combinations of the above. It returns the
5992 output appropriate to emit the branch corresponding to all given
5996 output_cbranch (rtx *operands, int negated, rtx insn)
5998 static char buf[100];
6000 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6001 int length = get_attr_length (insn);
6004 /* A conditional branch to the following instruction (e.g. the delay slot)
6005 is asking for a disaster. This can happen when not optimizing and
6006 when jump optimization fails.
6008 While it is usually safe to emit nothing, this can fail if the
6009 preceding instruction is a nullified branch with an empty delay
6010 slot and the same branch target as this branch. We could check
6011 for this but jump optimization should eliminate nop jumps. It
6012 is always safe to emit a nop. */
6013 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6016 /* The doubleword form of the cmpib instruction doesn't have the LEU
6017 and GTU conditions while the cmpb instruction does. Since we accept
6018 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6019 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6020 operands[2] = gen_rtx_REG (DImode, 0);
6021 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6022 operands[1] = gen_rtx_REG (DImode, 0);
6024 /* If this is a long branch with its delay slot unfilled, set `nullify'
6025 as it can nullify the delay slot and save a nop. */
6026 if (length == 8 && dbr_sequence_length () == 0)
6029 /* If this is a short forward conditional branch which did not get
6030 its delay slot filled, the delay slot can still be nullified. */
6031 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6032 nullify = forward_branch_p (insn);
6034 /* A forward branch over a single nullified insn can be done with a
6035 comclr instruction. This avoids a single cycle penalty due to
6036 mis-predicted branch if we fall through (branch not taken). */
6038 && next_real_insn (insn) != 0
6039 && get_attr_length (next_real_insn (insn)) == 4
6040 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6046 /* All short conditional branches except backwards with an unfilled
6050 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6052 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6053 if (GET_MODE (operands[1]) == DImode)
6056 strcat (buf, "%B3");
6058 strcat (buf, "%S3");
6060 strcat (buf, " %2,%r1,%%r0");
6062 strcat (buf, ",n %2,%r1,%0");
6064 strcat (buf, " %2,%r1,%0");
6067 /* All long conditionals. Note a short backward branch with an
6068 unfilled delay slot is treated just like a long backward branch
6069 with an unfilled delay slot. */
6071 /* Handle weird backwards branch with a filled delay slot
6072 which is nullified. */
6073 if (dbr_sequence_length () != 0
6074 && ! forward_branch_p (insn)
6077 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6078 if (GET_MODE (operands[1]) == DImode)
6081 strcat (buf, "%S3");
6083 strcat (buf, "%B3");
6084 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6086 /* Handle short backwards branch with an unfilled delay slot.
6087 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6088 taken and untaken branches. */
6089 else if (dbr_sequence_length () == 0
6090 && ! forward_branch_p (insn)
6091 && INSN_ADDRESSES_SET_P ()
6092 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6093 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6095 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6096 if (GET_MODE (operands[1]) == DImode)
6099 strcat (buf, "%B3 %2,%r1,%0%#");
6101 strcat (buf, "%S3 %2,%r1,%0%#");
6105 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6106 if (GET_MODE (operands[1]) == DImode)
6109 strcat (buf, "%S3");
6111 strcat (buf, "%B3");
6113 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6115 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6120 /* The reversed conditional branch must branch over one additional
6121 instruction if the delay slot is filled and needs to be extracted
6122 by output_lbranch. If the delay slot is empty or this is a
6123 nullified forward branch, the instruction after the reversed
6124 condition branch must be nullified. */
6125 if (dbr_sequence_length () == 0
6126 || (nullify && forward_branch_p (insn)))
6130 operands[4] = GEN_INT (length);
6135 operands[4] = GEN_INT (length + 4);
6138 /* Create a reversed conditional branch which branches around
6139 the following insns. */
6140 if (GET_MODE (operands[1]) != DImode)
6146 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6149 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6155 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6158 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6167 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6170 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6176 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6179 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6183 output_asm_insn (buf, operands);
6184 return output_lbranch (operands[0], insn, xdelay);
6189 /* This routine handles output of long unconditional branches that
6190 exceed the maximum range of a simple branch instruction. Since
6191 we don't have a register available for the branch, we save register
6192 %r1 in the frame marker, load the branch destination DEST into %r1,
6193 execute the branch, and restore %r1 in the delay slot of the branch.
6195 Since long branches may have an insn in the delay slot and the
6196 delay slot is used to restore %r1, we in general need to extract
6197 this insn and execute it before the branch. However, to facilitate
6198 use of this function by conditional branches, we also provide an
6199 option to not extract the delay insn so that it will be emitted
6200 after the long branch. So, if there is an insn in the delay slot,
6201 it is extracted if XDELAY is nonzero.
6203 The lengths of the various long-branch sequences are 20, 16 and 24
6204 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6207 output_lbranch (rtx dest, rtx insn, int xdelay)
6211 xoperands[0] = dest;
6213 /* First, free up the delay slot. */
6214 if (xdelay && dbr_sequence_length () != 0)
6216 /* We can't handle a jump in the delay slot. */
6217 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6219 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6222 /* Now delete the delay insn. */
6223 SET_INSN_DELETED (NEXT_INSN (insn));
6226 /* Output an insn to save %r1. The runtime documentation doesn't
6227 specify whether the "Clean Up" slot in the callers frame can
6228 be clobbered by the callee. It isn't copied by HP's builtin
6229 alloca, so this suggests that it can be clobbered if necessary.
6230 The "Static Link" location is copied by HP builtin alloca, so
6231 we avoid using it. Using the cleanup slot might be a problem
6232 if we have to interoperate with languages that pass cleanup
6233 information. However, it should be possible to handle these
6234 situations with GCC's asm feature.
6236 The "Current RP" slot is reserved for the called procedure, so
6237 we try to use it when we don't have a frame of our own. It's
6238 rather unlikely that we won't have a frame when we need to emit
6241 Really the way to go long term is a register scavenger; goto
6242 the target of the jump and find a register which we can use
6243 as a scratch to hold the value in %r1. Then, we wouldn't have
6244 to free up the delay slot or clobber a slot that may be needed
6245 for other purposes. */
6248 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6249 /* Use the return pointer slot in the frame marker. */
6250 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6252 /* Use the slot at -40 in the frame marker since HP builtin
6253 alloca doesn't copy it. */
6254 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6258 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6259 /* Use the return pointer slot in the frame marker. */
6260 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6262 /* Use the "Clean Up" slot in the frame marker. In GCC,
6263 the only other use of this location is for copying a
6264 floating point double argument from a floating-point
6265 register to two general registers. The copy is done
6266 as an "atomic" operation when outputting a call, so it
6267 won't interfere with our using the location here. */
6268 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6271 if (TARGET_PORTABLE_RUNTIME)
6273 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6274 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6275 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6279 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6280 if (TARGET_SOM || !TARGET_GAS)
6282 xoperands[1] = gen_label_rtx ();
6283 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6284 targetm.asm_out.internal_label (asm_out_file, "L",
6285 CODE_LABEL_NUMBER (xoperands[1]));
6286 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6290 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6291 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6293 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6296 /* Now output a very long branch to the original target. */
6297 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6299 /* Now restore the value of %r1 in the delay slot. */
6302 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6303 return "ldd -16(%%r30),%%r1";
6305 return "ldd -40(%%r30),%%r1";
6309 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6310 return "ldw -20(%%r30),%%r1";
6312 return "ldw -12(%%r30),%%r1";
6316 /* This routine handles all the branch-on-bit conditional branch sequences we
6317 might need to generate. It handles nullification of delay slots,
6318 varying length branches, negated branches and all combinations of the
6319 above. it returns the appropriate output template to emit the branch. */
6322 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6324 static char buf[100];
6326 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6327 int length = get_attr_length (insn);
6330 /* A conditional branch to the following instruction (e.g. the delay slot) is
6331 asking for a disaster. I do not think this can happen as this pattern
6332 is only used when optimizing; jump optimization should eliminate the
6333 jump. But be prepared just in case. */
6335 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6338 /* If this is a long branch with its delay slot unfilled, set `nullify'
6339 as it can nullify the delay slot and save a nop. */
6340 if (length == 8 && dbr_sequence_length () == 0)
6343 /* If this is a short forward conditional branch which did not get
6344 its delay slot filled, the delay slot can still be nullified. */
6345 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6346 nullify = forward_branch_p (insn);
6348 /* A forward branch over a single nullified insn can be done with a
6349 extrs instruction. This avoids a single cycle penalty due to
6350 mis-predicted branch if we fall through (branch not taken). */
6353 && next_real_insn (insn) != 0
6354 && get_attr_length (next_real_insn (insn)) == 4
6355 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6362 /* All short conditional branches except backwards with an unfilled
6366 strcpy (buf, "{extrs,|extrw,s,}");
6368 strcpy (buf, "bb,");
6369 if (useskip && GET_MODE (operands[0]) == DImode)
6370 strcpy (buf, "extrd,s,*");
6371 else if (GET_MODE (operands[0]) == DImode)
6372 strcpy (buf, "bb,*");
6373 if ((which == 0 && negated)
6374 || (which == 1 && ! negated))
6379 strcat (buf, " %0,%1,1,%%r0");
6380 else if (nullify && negated)
6381 strcat (buf, ",n %0,%1,%3");
6382 else if (nullify && ! negated)
6383 strcat (buf, ",n %0,%1,%2");
6384 else if (! nullify && negated)
6385 strcat (buf, "%0,%1,%3");
6386 else if (! nullify && ! negated)
6387 strcat (buf, " %0,%1,%2");
6390 /* All long conditionals. Note a short backward branch with an
6391 unfilled delay slot is treated just like a long backward branch
6392 with an unfilled delay slot. */
6394 /* Handle weird backwards branch with a filled delay slot
6395 which is nullified. */
6396 if (dbr_sequence_length () != 0
6397 && ! forward_branch_p (insn)
6400 strcpy (buf, "bb,");
6401 if (GET_MODE (operands[0]) == DImode)
6403 if ((which == 0 && negated)
6404 || (which == 1 && ! negated))
6409 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6411 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6413 /* Handle short backwards branch with an unfilled delay slot.
6414 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6415 taken and untaken branches. */
6416 else if (dbr_sequence_length () == 0
6417 && ! forward_branch_p (insn)
6418 && INSN_ADDRESSES_SET_P ()
6419 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6420 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6422 strcpy (buf, "bb,");
6423 if (GET_MODE (operands[0]) == DImode)
6425 if ((which == 0 && negated)
6426 || (which == 1 && ! negated))
6431 strcat (buf, " %0,%1,%3%#");
6433 strcat (buf, " %0,%1,%2%#");
6437 if (GET_MODE (operands[0]) == DImode)
6438 strcpy (buf, "extrd,s,*");
6440 strcpy (buf, "{extrs,|extrw,s,}");
6441 if ((which == 0 && negated)
6442 || (which == 1 && ! negated))
6446 if (nullify && negated)
6447 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6448 else if (nullify && ! negated)
6449 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6451 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6453 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6458 /* The reversed conditional branch must branch over one additional
6459 instruction if the delay slot is filled and needs to be extracted
6460 by output_lbranch. If the delay slot is empty or this is a
6461 nullified forward branch, the instruction after the reversed
6462 condition branch must be nullified. */
6463 if (dbr_sequence_length () == 0
6464 || (nullify && forward_branch_p (insn)))
6468 operands[4] = GEN_INT (length);
6473 operands[4] = GEN_INT (length + 4);
6476 if (GET_MODE (operands[0]) == DImode)
6477 strcpy (buf, "bb,*");
6479 strcpy (buf, "bb,");
6480 if ((which == 0 && negated)
6481 || (which == 1 && !negated))
6486 strcat (buf, ",n %0,%1,.+%4");
6488 strcat (buf, " %0,%1,.+%4");
6489 output_asm_insn (buf, operands);
6490 return output_lbranch (negated ? operands[3] : operands[2],
6496 /* This routine handles all the branch-on-variable-bit conditional branch
6497 sequences we might need to generate. It handles nullification of delay
6498 slots, varying length branches, negated branches and all combinations
6499 of the above. it returns the appropriate output template to emit the
6503 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6505 static char buf[100];
6507 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6508 int length = get_attr_length (insn);
6511 /* A conditional branch to the following instruction (e.g. the delay slot) is
6512 asking for a disaster. I do not think this can happen as this pattern
6513 is only used when optimizing; jump optimization should eliminate the
6514 jump. But be prepared just in case. */
6516 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6519 /* If this is a long branch with its delay slot unfilled, set `nullify'
6520 as it can nullify the delay slot and save a nop. */
6521 if (length == 8 && dbr_sequence_length () == 0)
6524 /* If this is a short forward conditional branch which did not get
6525 its delay slot filled, the delay slot can still be nullified. */
6526 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6527 nullify = forward_branch_p (insn);
6529 /* A forward branch over a single nullified insn can be done with a
6530 extrs instruction. This avoids a single cycle penalty due to
6531 mis-predicted branch if we fall through (branch not taken). */
6534 && next_real_insn (insn) != 0
6535 && get_attr_length (next_real_insn (insn)) == 4
6536 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6543 /* All short conditional branches except backwards with an unfilled
6547 strcpy (buf, "{vextrs,|extrw,s,}");
6549 strcpy (buf, "{bvb,|bb,}");
6550 if (useskip && GET_MODE (operands[0]) == DImode)
6551 strcpy (buf, "extrd,s,*");
6552 else if (GET_MODE (operands[0]) == DImode)
6553 strcpy (buf, "bb,*");
6554 if ((which == 0 && negated)
6555 || (which == 1 && ! negated))
6560 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6561 else if (nullify && negated)
6562 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6563 else if (nullify && ! negated)
6564 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6565 else if (! nullify && negated)
6566 strcat (buf, "{%0,%3|%0,%%sar,%3}");
6567 else if (! nullify && ! negated)
6568 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6571 /* All long conditionals. Note a short backward branch with an
6572 unfilled delay slot is treated just like a long backward branch
6573 with an unfilled delay slot. */
6575 /* Handle weird backwards branch with a filled delay slot
6576 which is nullified. */
6577 if (dbr_sequence_length () != 0
6578 && ! forward_branch_p (insn)
6581 strcpy (buf, "{bvb,|bb,}");
6582 if (GET_MODE (operands[0]) == DImode)
6584 if ((which == 0 && negated)
6585 || (which == 1 && ! negated))
6590 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6592 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6594 /* Handle short backwards branch with an unfilled delay slot.
6595 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6596 taken and untaken branches. */
6597 else if (dbr_sequence_length () == 0
6598 && ! forward_branch_p (insn)
6599 && INSN_ADDRESSES_SET_P ()
6600 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6601 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6603 strcpy (buf, "{bvb,|bb,}");
6604 if (GET_MODE (operands[0]) == DImode)
6606 if ((which == 0 && negated)
6607 || (which == 1 && ! negated))
6612 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6614 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6618 strcpy (buf, "{vextrs,|extrw,s,}");
6619 if (GET_MODE (operands[0]) == DImode)
6620 strcpy (buf, "extrd,s,*");
6621 if ((which == 0 && negated)
6622 || (which == 1 && ! negated))
6626 if (nullify && negated)
6627 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6628 else if (nullify && ! negated)
6629 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6631 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6633 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6638 /* The reversed conditional branch must branch over one additional
6639 instruction if the delay slot is filled and needs to be extracted
6640 by output_lbranch. If the delay slot is empty or this is a
6641 nullified forward branch, the instruction after the reversed
6642 condition branch must be nullified. */
6643 if (dbr_sequence_length () == 0
6644 || (nullify && forward_branch_p (insn)))
6648 operands[4] = GEN_INT (length);
6653 operands[4] = GEN_INT (length + 4);
6656 if (GET_MODE (operands[0]) == DImode)
6657 strcpy (buf, "bb,*");
6659 strcpy (buf, "{bvb,|bb,}");
6660 if ((which == 0 && negated)
6661 || (which == 1 && !negated))
6666 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6668 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6669 output_asm_insn (buf, operands);
6670 return output_lbranch (negated ? operands[3] : operands[2],
6676 /* Return the output template for emitting a dbra type insn.
6678 Note it may perform some output operations on its own before
6679 returning the final output string. */
6681 output_dbra (rtx *operands, rtx insn, int which_alternative)
6683 int length = get_attr_length (insn);
6685 /* A conditional branch to the following instruction (e.g. the delay slot) is
6686 asking for a disaster. Be prepared! */
6688 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6690 if (which_alternative == 0)
6691 return "ldo %1(%0),%0";
6692 else if (which_alternative == 1)
6694 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6695 output_asm_insn ("ldw -16(%%r30),%4", operands);
6696 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6697 return "{fldws|fldw} -16(%%r30),%0";
6701 output_asm_insn ("ldw %0,%4", operands);
6702 return "ldo %1(%4),%4\n\tstw %4,%0";
6706 if (which_alternative == 0)
6708 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6711 /* If this is a long branch with its delay slot unfilled, set `nullify'
6712 as it can nullify the delay slot and save a nop. */
6713 if (length == 8 && dbr_sequence_length () == 0)
6716 /* If this is a short forward conditional branch which did not get
6717 its delay slot filled, the delay slot can still be nullified. */
6718 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6719 nullify = forward_branch_p (insn);
6725 return "addib,%C2,n %1,%0,%3";
6727 return "addib,%C2 %1,%0,%3";
6730 /* Handle weird backwards branch with a fulled delay slot
6731 which is nullified. */
6732 if (dbr_sequence_length () != 0
6733 && ! forward_branch_p (insn)
6735 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6736 /* Handle short backwards branch with an unfilled delay slot.
6737 Using a addb;nop rather than addi;bl saves 1 cycle for both
6738 taken and untaken branches. */
6739 else if (dbr_sequence_length () == 0
6740 && ! forward_branch_p (insn)
6741 && INSN_ADDRESSES_SET_P ()
6742 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6743 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6744 return "addib,%C2 %1,%0,%3%#";
6746 /* Handle normal cases. */
6748 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6750 return "addi,%N2 %1,%0,%0\n\tb %3";
6753 /* The reversed conditional branch must branch over one additional
6754 instruction if the delay slot is filled and needs to be extracted
6755 by output_lbranch. If the delay slot is empty or this is a
6756 nullified forward branch, the instruction after the reversed
6757 condition branch must be nullified. */
6758 if (dbr_sequence_length () == 0
6759 || (nullify && forward_branch_p (insn)))
6763 operands[4] = GEN_INT (length);
6768 operands[4] = GEN_INT (length + 4);
6772 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
6774 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
6776 return output_lbranch (operands[3], insn, xdelay);
6780 /* Deal with gross reload from FP register case. */
6781 else if (which_alternative == 1)
6783 /* Move loop counter from FP register to MEM then into a GR,
6784 increment the GR, store the GR into MEM, and finally reload
6785 the FP register from MEM from within the branch's delay slot. */
6786 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6788 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6790 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6791 else if (length == 28)
6792 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6795 operands[5] = GEN_INT (length - 16);
6796 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
6797 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6798 return output_lbranch (operands[3], insn, 0);
6801 /* Deal with gross reload from memory case. */
6804 /* Reload loop counter from memory, the store back to memory
6805 happens in the branch's delay slot. */
6806 output_asm_insn ("ldw %0,%4", operands);
6808 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6809 else if (length == 16)
6810 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6813 operands[5] = GEN_INT (length - 4);
6814 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
6815 return output_lbranch (operands[3], insn, 0);
6820 /* Return the output template for emitting a movb type insn.
6822 Note it may perform some output operations on its own before
6823 returning the final output string. */
6825 output_movb (rtx *operands, rtx insn, int which_alternative,
6826 int reverse_comparison)
6828 int length = get_attr_length (insn);
6830 /* A conditional branch to the following instruction (e.g. the delay slot) is
6831 asking for a disaster. Be prepared! */
6833 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6835 if (which_alternative == 0)
6836 return "copy %1,%0";
6837 else if (which_alternative == 1)
6839 output_asm_insn ("stw %1,-16(%%r30)", operands);
6840 return "{fldws|fldw} -16(%%r30),%0";
6842 else if (which_alternative == 2)
6848 /* Support the second variant. */
6849 if (reverse_comparison)
6850 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6852 if (which_alternative == 0)
6854 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6857 /* If this is a long branch with its delay slot unfilled, set `nullify'
6858 as it can nullify the delay slot and save a nop. */
6859 if (length == 8 && dbr_sequence_length () == 0)
6862 /* If this is a short forward conditional branch which did not get
6863 its delay slot filled, the delay slot can still be nullified. */
6864 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6865 nullify = forward_branch_p (insn);
6871 return "movb,%C2,n %1,%0,%3";
6873 return "movb,%C2 %1,%0,%3";
6876 /* Handle weird backwards branch with a filled delay slot
6877 which is nullified. */
6878 if (dbr_sequence_length () != 0
6879 && ! forward_branch_p (insn)
6881 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6883 /* Handle short backwards branch with an unfilled delay slot.
6884 Using a movb;nop rather than or;bl saves 1 cycle for both
6885 taken and untaken branches. */
6886 else if (dbr_sequence_length () == 0
6887 && ! forward_branch_p (insn)
6888 && INSN_ADDRESSES_SET_P ()
6889 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6890 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6891 return "movb,%C2 %1,%0,%3%#";
6892 /* Handle normal cases. */
6894 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6896 return "or,%N2 %1,%%r0,%0\n\tb %3";
6899 /* The reversed conditional branch must branch over one additional
6900 instruction if the delay slot is filled and needs to be extracted
6901 by output_lbranch. If the delay slot is empty or this is a
6902 nullified forward branch, the instruction after the reversed
6903 condition branch must be nullified. */
6904 if (dbr_sequence_length () == 0
6905 || (nullify && forward_branch_p (insn)))
6909 operands[4] = GEN_INT (length);
6914 operands[4] = GEN_INT (length + 4);
6918 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
6920 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
6922 return output_lbranch (operands[3], insn, xdelay);
6925 /* Deal with gross reload for FP destination register case. */
6926 else if (which_alternative == 1)
6928 /* Move source register to MEM, perform the branch test, then
6929 finally load the FP register from MEM from within the branch's
6931 output_asm_insn ("stw %1,-16(%%r30)", operands);
6933 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6934 else if (length == 16)
6935 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6938 operands[4] = GEN_INT (length - 4);
6939 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
6940 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6941 return output_lbranch (operands[3], insn, 0);
6944 /* Deal with gross reload from memory case. */
6945 else if (which_alternative == 2)
6947 /* Reload loop counter from memory, the store back to memory
6948 happens in the branch's delay slot. */
6950 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6951 else if (length == 12)
6952 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6955 operands[4] = GEN_INT (length);
6956 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
6958 return output_lbranch (operands[3], insn, 0);
6961 /* Handle SAR as a destination. */
6965 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6966 else if (length == 12)
6967 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
6970 operands[4] = GEN_INT (length);
6971 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
6973 return output_lbranch (operands[3], insn, 0);
6978 /* Copy any FP arguments in INSN into integer registers. */
6980 copy_fp_args (rtx insn)
6985 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6987 int arg_mode, regno;
6988 rtx use = XEXP (link, 0);
6990 if (! (GET_CODE (use) == USE
6991 && GET_CODE (XEXP (use, 0)) == REG
6992 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6995 arg_mode = GET_MODE (XEXP (use, 0));
6996 regno = REGNO (XEXP (use, 0));
6998 /* Is it a floating point register? */
6999 if (regno >= 32 && regno <= 39)
7001 /* Copy the FP register into an integer register via memory. */
7002 if (arg_mode == SFmode)
7004 xoperands[0] = XEXP (use, 0);
7005 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7006 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7007 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7011 xoperands[0] = XEXP (use, 0);
7012 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7013 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7014 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7015 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7021 /* Compute length of the FP argument copy sequence for INSN. */
7023 length_fp_args (rtx insn)
7028 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7030 int arg_mode, regno;
7031 rtx use = XEXP (link, 0);
7033 if (! (GET_CODE (use) == USE
7034 && GET_CODE (XEXP (use, 0)) == REG
7035 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7038 arg_mode = GET_MODE (XEXP (use, 0));
7039 regno = REGNO (XEXP (use, 0));
7041 /* Is it a floating point register? */
7042 if (regno >= 32 && regno <= 39)
7044 if (arg_mode == SFmode)
7054 /* Return the attribute length for the millicode call instruction INSN.
7055 The length must match the code generated by output_millicode_call.
7056 We include the delay slot in the returned length as it is better to
7057 over estimate the length than to under estimate it. */
7060 attr_length_millicode_call (rtx insn)
7062 unsigned long distance = -1;
7063 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7065 if (INSN_ADDRESSES_SET_P ())
7067 distance = (total + insn_current_reference_address (insn));
7068 if (distance < total)
7074 if (!TARGET_LONG_CALLS && distance < 7600000)
7079 else if (TARGET_PORTABLE_RUNTIME)
7083 if (!TARGET_LONG_CALLS && distance < 240000)
7086 if (TARGET_LONG_ABS_CALL && !flag_pic)
7093 /* INSN is a function call. It may have an unconditional jump
7096 CALL_DEST is the routine we are calling. */
7099 output_millicode_call (rtx insn, rtx call_dest)
7101 int attr_length = get_attr_length (insn);
7102 int seq_length = dbr_sequence_length ();
7107 xoperands[0] = call_dest;
7108 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7110 /* Handle the common case where we are sure that the branch will
7111 reach the beginning of the $CODE$ subspace. The within reach
7112 form of the $$sh_func_adrs call has a length of 28. Because
7113 it has an attribute type of multi, it never has a nonzero
7114 sequence length. The length of the $$sh_func_adrs is the same
7115 as certain out of reach PIC calls to other routines. */
7116 if (!TARGET_LONG_CALLS
7117 && ((seq_length == 0
7118 && (attr_length == 12
7119 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7120 || (seq_length != 0 && attr_length == 8)))
7122 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7128 /* It might seem that one insn could be saved by accessing
7129 the millicode function using the linkage table. However,
7130 this doesn't work in shared libraries and other dynamically
7131 loaded objects. Using a pc-relative sequence also avoids
7132 problems related to the implicit use of the gp register. */
7133 output_asm_insn ("b,l .+8,%%r1", xoperands);
7137 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7138 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7142 xoperands[1] = gen_label_rtx ();
7143 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7144 targetm.asm_out.internal_label (asm_out_file, "L",
7145 CODE_LABEL_NUMBER (xoperands[1]));
7146 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7149 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7151 else if (TARGET_PORTABLE_RUNTIME)
7153 /* Pure portable runtime doesn't allow be/ble; we also don't
7154 have PIC support in the assembler/linker, so this sequence
7157 /* Get the address of our target into %r1. */
7158 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7159 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7161 /* Get our return address into %r31. */
7162 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7163 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7165 /* Jump to our target address in %r1. */
7166 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7170 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7172 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7174 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7178 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7179 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7181 if (TARGET_SOM || !TARGET_GAS)
7183 /* The HP assembler can generate relocations for the
7184 difference of two symbols. GAS can do this for a
7185 millicode symbol but not an arbitrary external
7186 symbol when generating SOM output. */
7187 xoperands[1] = gen_label_rtx ();
7188 targetm.asm_out.internal_label (asm_out_file, "L",
7189 CODE_LABEL_NUMBER (xoperands[1]));
7190 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7191 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7195 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7196 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7200 /* Jump to our target address in %r1. */
7201 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7205 if (seq_length == 0)
7206 output_asm_insn ("nop", xoperands);
7208 /* We are done if there isn't a jump in the delay slot. */
7209 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7212 /* This call has an unconditional jump in its delay slot. */
7213 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7215 /* See if the return address can be adjusted. Use the containing
7216 sequence insn's address. */
7217 if (INSN_ADDRESSES_SET_P ())
7219 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7220 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7221 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7223 if (VAL_14_BITS_P (distance))
7225 xoperands[1] = gen_label_rtx ();
7226 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7227 targetm.asm_out.internal_label (asm_out_file, "L",
7228 CODE_LABEL_NUMBER (xoperands[1]));
7231 /* ??? This branch may not reach its target. */
7232 output_asm_insn ("nop\n\tb,n %0", xoperands);
7235 /* ??? This branch may not reach its target. */
7236 output_asm_insn ("nop\n\tb,n %0", xoperands);
7238 /* Delete the jump. */
7239 SET_INSN_DELETED (NEXT_INSN (insn));
7244 /* Return the attribute length of the call instruction INSN. The SIBCALL
7245 flag indicates whether INSN is a regular call or a sibling call. The
7246 length returned must be longer than the code actually generated by
7247 output_call. Since branch shortening is done before delay branch
7248 sequencing, there is no way to determine whether or not the delay
7249 slot will be filled during branch shortening. Even when the delay
7250 slot is filled, we may have to add a nop if the delay slot contains
7251 a branch that can't reach its target. Thus, we always have to include
7252 the delay slot in the length estimate. This used to be done in
7253 pa_adjust_insn_length but we do it here now as some sequences always
7254 fill the delay slot and we can save four bytes in the estimate for
7258 attr_length_call (rtx insn, int sibcall)
7264 rtx pat = PATTERN (insn);
7265 unsigned long distance = -1;
7267 if (INSN_ADDRESSES_SET_P ())
7269 unsigned long total;
7271 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7272 distance = (total + insn_current_reference_address (insn));
7273 if (distance < total)
7277 /* Determine if this is a local call. */
7278 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL)
7279 call_dest = XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0);
7281 call_dest = XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0);
7283 call_decl = SYMBOL_REF_DECL (call_dest);
7284 local_call = call_decl && targetm.binds_local_p (call_decl);
7286 /* pc-relative branch. */
7287 if (!TARGET_LONG_CALLS
7288 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7289 || distance < 240000))
7292 /* 64-bit plabel sequence. */
7293 else if (TARGET_64BIT && !local_call)
7294 length += sibcall ? 28 : 24;
7296 /* non-pic long absolute branch sequence. */
7297 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7300 /* long pc-relative branch sequence. */
7301 else if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7302 || (TARGET_64BIT && !TARGET_GAS)
7303 || (TARGET_GAS && !TARGET_SOM
7304 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7308 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS)
7312 /* 32-bit plabel sequence. */
7318 length += length_fp_args (insn);
7328 if (!TARGET_NO_SPACE_REGS)
7336 /* INSN is a function call. It may have an unconditional jump
7339 CALL_DEST is the routine we are calling. */
7342 output_call (rtx insn, rtx call_dest, int sibcall)
7344 int delay_insn_deleted = 0;
7345 int delay_slot_filled = 0;
7346 int seq_length = dbr_sequence_length ();
7347 tree call_decl = SYMBOL_REF_DECL (call_dest);
7348 int local_call = call_decl && targetm.binds_local_p (call_decl);
7351 xoperands[0] = call_dest;
7353 /* Handle the common case where we're sure that the branch will reach
7354 the beginning of the "$CODE$" subspace. This is the beginning of
7355 the current function if we are in a named section. */
7356 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7358 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7359 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7363 if (TARGET_64BIT && !local_call)
7365 /* ??? As far as I can tell, the HP linker doesn't support the
7366 long pc-relative sequence described in the 64-bit runtime
7367 architecture. So, we use a slightly longer indirect call. */
7368 xoperands[0] = get_deferred_plabel (call_dest);
7369 xoperands[1] = gen_label_rtx ();
7371 /* If this isn't a sibcall, we put the load of %r27 into the
7372 delay slot. We can't do this in a sibcall as we don't
7373 have a second call-clobbered scratch register available. */
7375 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7378 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7381 /* Now delete the delay insn. */
7382 SET_INSN_DELETED (NEXT_INSN (insn));
7383 delay_insn_deleted = 1;
7386 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7387 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7388 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7392 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7393 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7394 output_asm_insn ("bve (%%r1)", xoperands);
7398 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7399 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7400 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7401 delay_slot_filled = 1;
7406 int indirect_call = 0;
7408 /* Emit a long call. There are several different sequences
7409 of increasing length and complexity. In most cases,
7410 they don't allow an instruction in the delay slot. */
7411 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7412 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7413 && !(TARGET_GAS && !TARGET_SOM
7414 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7419 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7421 && (!TARGET_PA_20 || indirect_call))
7423 /* A non-jump insn in the delay slot. By definition we can
7424 emit this insn before the call (and in fact before argument
7426 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7429 /* Now delete the delay insn. */
7430 SET_INSN_DELETED (NEXT_INSN (insn));
7431 delay_insn_deleted = 1;
7434 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7436 /* This is the best sequence for making long calls in
7437 non-pic code. Unfortunately, GNU ld doesn't provide
7438 the stub needed for external calls, and GAS's support
7439 for this with the SOM linker is buggy. It is safe
7440 to use this for local calls. */
7441 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7443 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7447 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7450 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7452 output_asm_insn ("copy %%r31,%%r2", xoperands);
7453 delay_slot_filled = 1;
7458 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7459 || (TARGET_64BIT && !TARGET_GAS))
7461 /* The HP assembler and linker can handle relocations
7462 for the difference of two symbols. GAS and the HP
7463 linker can't do this when one of the symbols is
7465 xoperands[1] = gen_label_rtx ();
7466 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7467 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7468 targetm.asm_out.internal_label (asm_out_file, "L",
7469 CODE_LABEL_NUMBER (xoperands[1]));
7470 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7472 else if (TARGET_GAS && !TARGET_SOM
7473 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7475 /* GAS currently can't generate the relocations that
7476 are needed for the SOM linker under HP-UX using this
7477 sequence. The GNU linker doesn't generate the stubs
7478 that are needed for external calls on TARGET_ELF32
7479 with this sequence. For now, we have to use a
7480 longer plabel sequence when using GAS. */
7481 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7482 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7484 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7489 /* Emit a long plabel-based call sequence. This is
7490 essentially an inline implementation of $$dyncall.
7491 We don't actually try to call $$dyncall as this is
7492 as difficult as calling the function itself. */
7493 xoperands[0] = get_deferred_plabel (call_dest);
7494 xoperands[1] = gen_label_rtx ();
7496 /* Since the call is indirect, FP arguments in registers
7497 need to be copied to the general registers. Then, the
7498 argument relocation stub will copy them back. */
7500 copy_fp_args (insn);
7504 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7505 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7506 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7510 output_asm_insn ("addil LR'%0-$global$,%%r27",
7512 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7516 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7517 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7518 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7519 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7521 if (!sibcall && !TARGET_PA_20)
7523 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7524 if (TARGET_NO_SPACE_REGS)
7525 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7527 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7534 output_asm_insn ("bve (%%r1)", xoperands);
7539 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7540 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7541 delay_slot_filled = 1;
7544 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7549 if (!TARGET_NO_SPACE_REGS)
7550 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7555 if (TARGET_NO_SPACE_REGS)
7556 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7558 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7562 if (TARGET_NO_SPACE_REGS)
7563 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7565 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7568 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7570 output_asm_insn ("copy %%r31,%%r2", xoperands);
7571 delay_slot_filled = 1;
7578 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7579 output_asm_insn ("nop", xoperands);
7581 /* We are done if there isn't a jump in the delay slot. */
7583 || delay_insn_deleted
7584 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7587 /* A sibcall should never have a branch in the delay slot. */
7588 gcc_assert (!sibcall);
7590 /* This call has an unconditional jump in its delay slot. */
7591 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7593 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7595 /* See if the return address can be adjusted. Use the containing
7596 sequence insn's address. */
7597 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7598 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7599 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7601 if (VAL_14_BITS_P (distance))
7603 xoperands[1] = gen_label_rtx ();
7604 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7605 targetm.asm_out.internal_label (asm_out_file, "L",
7606 CODE_LABEL_NUMBER (xoperands[1]));
7609 output_asm_insn ("nop\n\tb,n %0", xoperands);
7612 output_asm_insn ("b,n %0", xoperands);
7614 /* Delete the jump. */
7615 SET_INSN_DELETED (NEXT_INSN (insn));
7620 /* Return the attribute length of the indirect call instruction INSN.
7621 The length must match the code generated by output_indirect call.
7622 The returned length includes the delay slot. Currently, the delay
7623 slot of an indirect call sequence is not exposed and it is used by
7624 the sequence itself. */
7627 attr_length_indirect_call (rtx insn)
7629 unsigned long distance = -1;
7630 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7632 if (INSN_ADDRESSES_SET_P ())
7634 distance = (total + insn_current_reference_address (insn));
7635 if (distance < total)
7642 if (TARGET_FAST_INDIRECT_CALLS
7643 || (!TARGET_PORTABLE_RUNTIME
7644 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7645 || distance < 240000)))
7651 if (TARGET_PORTABLE_RUNTIME)
7654 /* Out of reach, can use ble. */
7659 output_indirect_call (rtx insn, rtx call_dest)
7665 xoperands[0] = call_dest;
7666 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7667 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7671 /* First the special case for kernels, level 0 systems, etc. */
7672 if (TARGET_FAST_INDIRECT_CALLS)
7673 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7675 /* Now the normal case -- we can reach $$dyncall directly or
7676 we're sure that we can get there via a long-branch stub.
7678 No need to check target flags as the length uniquely identifies
7679 the remaining cases. */
7680 if (attr_length_indirect_call (insn) == 8)
7682 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7683 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
7684 variant of the B,L instruction can't be used on the SOM target. */
7685 if (TARGET_PA_20 && !TARGET_SOM)
7686 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7688 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7691 /* Long millicode call, but we are not generating PIC or portable runtime
7693 if (attr_length_indirect_call (insn) == 12)
7694 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7696 /* Long millicode call for portable runtime. */
7697 if (attr_length_indirect_call (insn) == 20)
7698 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7700 /* We need a long PIC call to $$dyncall. */
7701 xoperands[0] = NULL_RTX;
7702 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7703 if (TARGET_SOM || !TARGET_GAS)
7705 xoperands[0] = gen_label_rtx ();
7706 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7707 targetm.asm_out.internal_label (asm_out_file, "L",
7708 CODE_LABEL_NUMBER (xoperands[0]));
7709 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7713 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7714 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7717 output_asm_insn ("blr %%r0,%%r2", xoperands);
7718 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7722 /* Return the total length of the save and restore instructions needed for
7723 the data linkage table pointer (i.e., the PIC register) across the call
7724 instruction INSN. No-return calls do not require a save and restore.
7725 In addition, we may be able to avoid the save and restore for calls
7726 within the same translation unit. */
7729 attr_length_save_restore_dltp (rtx insn)
7731 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7737 /* In HPUX 8.0's shared library scheme, special relocations are needed
7738 for function labels if they might be passed to a function
7739 in a shared library (because shared libraries don't live in code
7740 space), and special magic is needed to construct their address. */
7743 hppa_encode_label (rtx sym)
7745 const char *str = XSTR (sym, 0);
7746 int len = strlen (str) + 1;
7749 p = newstr = alloca (len + 1);
7753 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7757 pa_encode_section_info (tree decl, rtx rtl, int first)
7759 default_encode_section_info (decl, rtl, first);
7761 if (first && TEXT_SPACE_P (decl))
7763 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7764 if (TREE_CODE (decl) == FUNCTION_DECL)
7765 hppa_encode_label (XEXP (rtl, 0));
7769 /* This is sort of inverse to pa_encode_section_info. */
7772 pa_strip_name_encoding (const char *str)
7774 str += (*str == '@');
7775 str += (*str == '*');
7780 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7782 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7785 /* Returns 1 if OP is a function label involved in a simple addition
7786 with a constant. Used to keep certain patterns from matching
7787 during instruction combination. */
7789 is_function_label_plus_const (rtx op)
7791 /* Strip off any CONST. */
7792 if (GET_CODE (op) == CONST)
7795 return (GET_CODE (op) == PLUS
7796 && function_label_operand (XEXP (op, 0), Pmode)
7797 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7800 /* Output assembly code for a thunk to FUNCTION. */
7803 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7804 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7807 static unsigned int current_thunk_number;
7808 int val_14 = VAL_14_BITS_P (delta);
7813 xoperands[0] = XEXP (DECL_RTL (function), 0);
7814 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
7815 xoperands[2] = GEN_INT (delta);
7817 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
7818 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7820 /* Output the thunk. We know that the function is in the same
7821 translation unit (i.e., the same space) as the thunk, and that
7822 thunks are output after their method. Thus, we don't need an
7823 external branch to reach the function. With SOM and GAS,
7824 functions and thunks are effectively in different sections.
7825 Thus, we can always use a IA-relative branch and the linker
7826 will add a long branch stub if necessary.
7828 However, we have to be careful when generating PIC code on the
7829 SOM port to ensure that the sequence does not transfer to an
7830 import stub for the target function as this could clobber the
7831 return value saved at SP-24. This would also apply to the
7832 32-bit linux port if the multi-space model is implemented. */
7833 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7834 && !(flag_pic && TREE_PUBLIC (function))
7835 && (TARGET_GAS || last_address < 262132))
7836 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7837 && ((targetm.have_named_sections
7838 && DECL_SECTION_NAME (thunk_fndecl) != NULL
7839 /* The GNU 64-bit linker has rather poor stub management.
7840 So, we use a long branch from thunks that aren't in
7841 the same section as the target function. */
7843 && (DECL_SECTION_NAME (thunk_fndecl)
7844 != DECL_SECTION_NAME (function)))
7845 || ((DECL_SECTION_NAME (thunk_fndecl)
7846 == DECL_SECTION_NAME (function))
7847 && last_address < 262132)))
7848 || (!targetm.have_named_sections && last_address < 262132))))
7851 output_asm_insn ("addil L'%2,%%r26", xoperands);
7853 output_asm_insn ("b %0", xoperands);
7857 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7862 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7866 else if (TARGET_64BIT)
7868 /* We only have one call-clobbered scratch register, so we can't
7869 make use of the delay slot if delta doesn't fit in 14 bits. */
7872 output_asm_insn ("addil L'%2,%%r26", xoperands);
7873 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7876 output_asm_insn ("b,l .+8,%%r1", xoperands);
7880 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7881 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7885 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
7886 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
7891 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7892 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7897 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
7901 else if (TARGET_PORTABLE_RUNTIME)
7903 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7904 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
7907 output_asm_insn ("addil L'%2,%%r26", xoperands);
7909 output_asm_insn ("bv %%r0(%%r22)", xoperands);
7913 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7918 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7922 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7924 /* The function is accessible from outside this module. The only
7925 way to avoid an import stub between the thunk and function is to
7926 call the function directly with an indirect sequence similar to
7927 that used by $$dyncall. This is possible because $$dyncall acts
7928 as the import stub in an indirect call. */
7929 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7930 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
7931 output_asm_insn ("addil LT'%3,%%r19", xoperands);
7932 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
7933 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
7934 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
7935 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
7936 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
7937 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
7941 output_asm_insn ("addil L'%2,%%r26", xoperands);
7947 output_asm_insn ("bve (%%r22)", xoperands);
7950 else if (TARGET_NO_SPACE_REGS)
7952 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
7957 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
7958 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
7959 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
7964 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7966 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7970 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7972 if (TARGET_SOM || !TARGET_GAS)
7974 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
7975 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
7979 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7980 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
7984 output_asm_insn ("addil L'%2,%%r26", xoperands);
7986 output_asm_insn ("bv %%r0(%%r22)", xoperands);
7990 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7995 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8002 output_asm_insn ("addil L'%2,%%r26", xoperands);
8004 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8005 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8009 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8014 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8019 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8021 if (TARGET_SOM && TARGET_GAS)
8023 /* We done with this subspace except possibly for some additional
8024 debug information. Forget that we are in this subspace to ensure
8025 that the next function is output in its own subspace. */
8027 cfun->machine->in_nsubspa = 2;
8030 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8032 switch_to_section (data_section);
8033 output_asm_insn (".align 4", xoperands);
8034 ASM_OUTPUT_LABEL (file, label);
8035 output_asm_insn (".word P'%0", xoperands);
8038 current_thunk_number++;
8039 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8040 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8041 last_address += nbytes;
8042 update_total_code_bytes (nbytes);
8045 /* Only direct calls to static functions are allowed to be sibling (tail)
8048 This restriction is necessary because some linker generated stubs will
8049 store return pointers into rp' in some cases which might clobber a
8050 live value already in rp'.
8052 In a sibcall the current function and the target function share stack
8053 space. Thus if the path to the current function and the path to the
8054 target function save a value in rp', they save the value into the
8055 same stack slot, which has undesirable consequences.
8057 Because of the deferred binding nature of shared libraries any function
8058 with external scope could be in a different load module and thus require
8059 rp' to be saved when calling that function. So sibcall optimizations
8060 can only be safe for static function.
8062 Note that GCC never needs return value relocations, so we don't have to
8063 worry about static calls with return value relocations (which require
8066 It is safe to perform a sibcall optimization when the target function
8067 will never return. */
8069 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8071 if (TARGET_PORTABLE_RUNTIME)
8074 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8075 single subspace mode and the call is not indirect. As far as I know,
8076 there is no operating system support for the multiple subspace mode.
8077 It might be possible to support indirect calls if we didn't use
8078 $$dyncall (see the indirect sequence generated in output_call). */
8080 return (decl != NULL_TREE);
8082 /* Sibcalls are not ok because the arg pointer register is not a fixed
8083 register. This prevents the sibcall optimization from occurring. In
8084 addition, there are problems with stub placement using GNU ld. This
8085 is because a normal sibcall branch uses a 17-bit relocation while
8086 a regular call branch uses a 22-bit relocation. As a result, more
8087 care needs to be taken in the placement of long-branch stubs. */
8091 /* Sibcalls are only ok within a translation unit. */
8092 return (decl && !TREE_PUBLIC (decl));
8095 /* ??? Addition is not commutative on the PA due to the weird implicit
8096 space register selection rules for memory addresses. Therefore, we
8097 don't consider a + b == b + a, as this might be inside a MEM. */
8099 pa_commutative_p (rtx x, int outer_code)
8101 return (COMMUTATIVE_P (x)
8102 && (TARGET_NO_SPACE_REGS
8103 || (outer_code != UNKNOWN && outer_code != MEM)
8104 || GET_CODE (x) != PLUS));
8107 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8108 use in fmpyadd instructions. */
8110 fmpyaddoperands (rtx *operands)
8112 enum machine_mode mode = GET_MODE (operands[0]);
8114 /* Must be a floating point mode. */
8115 if (mode != SFmode && mode != DFmode)
8118 /* All modes must be the same. */
8119 if (! (mode == GET_MODE (operands[1])
8120 && mode == GET_MODE (operands[2])
8121 && mode == GET_MODE (operands[3])
8122 && mode == GET_MODE (operands[4])
8123 && mode == GET_MODE (operands[5])))
8126 /* All operands must be registers. */
8127 if (! (GET_CODE (operands[1]) == REG
8128 && GET_CODE (operands[2]) == REG
8129 && GET_CODE (operands[3]) == REG
8130 && GET_CODE (operands[4]) == REG
8131 && GET_CODE (operands[5]) == REG))
8134 /* Only 2 real operands to the addition. One of the input operands must
8135 be the same as the output operand. */
8136 if (! rtx_equal_p (operands[3], operands[4])
8137 && ! rtx_equal_p (operands[3], operands[5]))
8140 /* Inout operand of add cannot conflict with any operands from multiply. */
8141 if (rtx_equal_p (operands[3], operands[0])
8142 || rtx_equal_p (operands[3], operands[1])
8143 || rtx_equal_p (operands[3], operands[2]))
8146 /* multiply cannot feed into addition operands. */
8147 if (rtx_equal_p (operands[4], operands[0])
8148 || rtx_equal_p (operands[5], operands[0]))
8151 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8153 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8154 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8155 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8156 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8157 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8158 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8161 /* Passed. Operands are suitable for fmpyadd. */
8165 #if !defined(USE_COLLECT2)
8167 pa_asm_out_constructor (rtx symbol, int priority)
8169 if (!function_label_operand (symbol, VOIDmode))
8170 hppa_encode_label (symbol);
8172 #ifdef CTORS_SECTION_ASM_OP
8173 default_ctor_section_asm_out_constructor (symbol, priority);
8175 # ifdef TARGET_ASM_NAMED_SECTION
8176 default_named_section_asm_out_constructor (symbol, priority);
8178 default_stabs_asm_out_constructor (symbol, priority);
8184 pa_asm_out_destructor (rtx symbol, int priority)
8186 if (!function_label_operand (symbol, VOIDmode))
8187 hppa_encode_label (symbol);
8189 #ifdef DTORS_SECTION_ASM_OP
8190 default_dtor_section_asm_out_destructor (symbol, priority);
8192 # ifdef TARGET_ASM_NAMED_SECTION
8193 default_named_section_asm_out_destructor (symbol, priority);
8195 default_stabs_asm_out_destructor (symbol, priority);
8201 /* This function places uninitialized global data in the bss section.
8202 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8203 function on the SOM port to prevent uninitialized global data from
8204 being placed in the data section. */
8207 pa_asm_output_aligned_bss (FILE *stream,
8209 unsigned HOST_WIDE_INT size,
8212 switch_to_section (bss_section);
8213 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8215 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8216 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8219 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8220 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8223 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8224 ASM_OUTPUT_LABEL (stream, name);
8225 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8228 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8229 that doesn't allow the alignment of global common storage to be directly
8230 specified. The SOM linker aligns common storage based on the rounded
8231 value of the NUM_BYTES parameter in the .comm directive. It's not
8232 possible to use the .align directive as it doesn't affect the alignment
8233 of the label associated with a .comm directive. */
8236 pa_asm_output_aligned_common (FILE *stream,
8238 unsigned HOST_WIDE_INT size,
8241 unsigned int max_common_align;
8243 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8244 if (align > max_common_align)
8246 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8247 "for global common data. Using %u",
8248 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8249 align = max_common_align;
8252 switch_to_section (bss_section);
8254 assemble_name (stream, name);
8255 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8256 MAX (size, align / BITS_PER_UNIT));
8259 /* We can't use .comm for local common storage as the SOM linker effectively
8260 treats the symbol as universal and uses the same storage for local symbols
8261 with the same name in different object files. The .block directive
8262 reserves an uninitialized block of storage. However, it's not common
8263 storage. Fortunately, GCC never requests common storage with the same
8264 name in any given translation unit. */
8267 pa_asm_output_aligned_local (FILE *stream,
8269 unsigned HOST_WIDE_INT size,
8272 switch_to_section (bss_section);
8273 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8276 fprintf (stream, "%s", LOCAL_ASM_OP);
8277 assemble_name (stream, name);
8278 fprintf (stream, "\n");
8281 ASM_OUTPUT_LABEL (stream, name);
8282 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8285 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8286 use in fmpysub instructions. */
8288 fmpysuboperands (rtx *operands)
8290 enum machine_mode mode = GET_MODE (operands[0]);
8292 /* Must be a floating point mode. */
8293 if (mode != SFmode && mode != DFmode)
8296 /* All modes must be the same. */
8297 if (! (mode == GET_MODE (operands[1])
8298 && mode == GET_MODE (operands[2])
8299 && mode == GET_MODE (operands[3])
8300 && mode == GET_MODE (operands[4])
8301 && mode == GET_MODE (operands[5])))
8304 /* All operands must be registers. */
8305 if (! (GET_CODE (operands[1]) == REG
8306 && GET_CODE (operands[2]) == REG
8307 && GET_CODE (operands[3]) == REG
8308 && GET_CODE (operands[4]) == REG
8309 && GET_CODE (operands[5]) == REG))
8312 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8313 operation, so operands[4] must be the same as operand[3]. */
8314 if (! rtx_equal_p (operands[3], operands[4]))
8317 /* multiply cannot feed into subtraction. */
8318 if (rtx_equal_p (operands[5], operands[0]))
8321 /* Inout operand of sub cannot conflict with any operands from multiply. */
8322 if (rtx_equal_p (operands[3], operands[0])
8323 || rtx_equal_p (operands[3], operands[1])
8324 || rtx_equal_p (operands[3], operands[2]))
8327 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8329 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8330 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8331 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8332 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8333 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8334 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8337 /* Passed. Operands are suitable for fmpysub. */
8341 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8342 constants for shadd instructions. */
8344 shadd_constant_p (int val)
8346 if (val == 2 || val == 4 || val == 8)
8352 /* Return 1 if OP is valid as a base or index register in a
8356 borx_reg_operand (rtx op, enum machine_mode mode)
8358 if (GET_CODE (op) != REG)
8361 /* We must reject virtual registers as the only expressions that
8362 can be instantiated are REG and REG+CONST. */
8363 if (op == virtual_incoming_args_rtx
8364 || op == virtual_stack_vars_rtx
8365 || op == virtual_stack_dynamic_rtx
8366 || op == virtual_outgoing_args_rtx
8367 || op == virtual_cfa_rtx)
8370 /* While it's always safe to index off the frame pointer, it's not
8371 profitable to do so when the frame pointer is being eliminated. */
8372 if (!reload_completed
8373 && flag_omit_frame_pointer
8374 && !current_function_calls_alloca
8375 && op == frame_pointer_rtx)
8378 return register_operand (op, mode);
8381 /* Return 1 if this operand is anything other than a hard register. */
8384 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8386 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8389 /* Return 1 if INSN branches forward. Should be using insn_addresses
8390 to avoid walking through all the insns... */
8392 forward_branch_p (rtx insn)
8394 rtx label = JUMP_LABEL (insn);
8401 insn = NEXT_INSN (insn);
8404 return (insn == label);
8407 /* Return 1 if OP is an equality comparison, else return 0. */
8409 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8411 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8414 /* Return 1 if INSN is in the delay slot of a call instruction. */
8416 jump_in_call_delay (rtx insn)
8419 if (GET_CODE (insn) != JUMP_INSN)
8422 if (PREV_INSN (insn)
8423 && PREV_INSN (PREV_INSN (insn))
8424 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8426 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8428 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8429 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8436 /* Output an unconditional move and branch insn. */
8439 output_parallel_movb (rtx *operands, rtx insn)
8441 int length = get_attr_length (insn);
8443 /* These are the cases in which we win. */
8445 return "mov%I1b,tr %1,%0,%2";
8447 /* None of the following cases win, but they don't lose either. */
8450 if (dbr_sequence_length () == 0)
8452 /* Nothing in the delay slot, fake it by putting the combined
8453 insn (the copy or add) in the delay slot of a bl. */
8454 if (GET_CODE (operands[1]) == CONST_INT)
8455 return "b %2\n\tldi %1,%0";
8457 return "b %2\n\tcopy %1,%0";
8461 /* Something in the delay slot, but we've got a long branch. */
8462 if (GET_CODE (operands[1]) == CONST_INT)
8463 return "ldi %1,%0\n\tb %2";
8465 return "copy %1,%0\n\tb %2";
8469 if (GET_CODE (operands[1]) == CONST_INT)
8470 output_asm_insn ("ldi %1,%0", operands);
8472 output_asm_insn ("copy %1,%0", operands);
8473 return output_lbranch (operands[2], insn, 1);
8476 /* Output an unconditional add and branch insn. */
8479 output_parallel_addb (rtx *operands, rtx insn)
8481 int length = get_attr_length (insn);
8483 /* To make life easy we want operand0 to be the shared input/output
8484 operand and operand1 to be the readonly operand. */
8485 if (operands[0] == operands[1])
8486 operands[1] = operands[2];
8488 /* These are the cases in which we win. */
8490 return "add%I1b,tr %1,%0,%3";
8492 /* None of the following cases win, but they don't lose either. */
8495 if (dbr_sequence_length () == 0)
8496 /* Nothing in the delay slot, fake it by putting the combined
8497 insn (the copy or add) in the delay slot of a bl. */
8498 return "b %3\n\tadd%I1 %1,%0,%0";
8500 /* Something in the delay slot, but we've got a long branch. */
8501 return "add%I1 %1,%0,%0\n\tb %3";
8504 output_asm_insn ("add%I1 %1,%0,%0", operands);
8505 return output_lbranch (operands[3], insn, 1);
8508 /* Return nonzero if INSN (a jump insn) immediately follows a call
8509 to a named function. This is used to avoid filling the delay slot
8510 of the jump since it can usually be eliminated by modifying RP in
8511 the delay slot of the call. */
8514 following_call (rtx insn)
8516 if (! TARGET_JUMP_IN_DELAY)
8519 /* Find the previous real insn, skipping NOTEs. */
8520 insn = PREV_INSN (insn);
8521 while (insn && GET_CODE (insn) == NOTE)
8522 insn = PREV_INSN (insn);
8524 /* Check for CALL_INSNs and millicode calls. */
8526 && ((GET_CODE (insn) == CALL_INSN
8527 && get_attr_type (insn) != TYPE_DYNCALL)
8528 || (GET_CODE (insn) == INSN
8529 && GET_CODE (PATTERN (insn)) != SEQUENCE
8530 && GET_CODE (PATTERN (insn)) != USE
8531 && GET_CODE (PATTERN (insn)) != CLOBBER
8532 && get_attr_type (insn) == TYPE_MILLI)))
8538 /* We use this hook to perform a PA specific optimization which is difficult
8539 to do in earlier passes.
8541 We want the delay slots of branches within jump tables to be filled.
8542 None of the compiler passes at the moment even has the notion that a
8543 PA jump table doesn't contain addresses, but instead contains actual
8546 Because we actually jump into the table, the addresses of each entry
8547 must stay constant in relation to the beginning of the table (which
8548 itself must stay constant relative to the instruction to jump into
8549 it). I don't believe we can guarantee earlier passes of the compiler
8550 will adhere to those rules.
8552 So, late in the compilation process we find all the jump tables, and
8553 expand them into real code -- e.g. each entry in the jump table vector
8554 will get an appropriate label followed by a jump to the final target.
8556 Reorg and the final jump pass can then optimize these branches and
8557 fill their delay slots. We end up with smaller, more efficient code.
8559 The jump instructions within the table are special; we must be able
8560 to identify them during assembly output (if the jumps don't get filled
8561 we need to emit a nop rather than nullifying the delay slot)). We
8562 identify jumps in switch tables by using insns with the attribute
8563 type TYPE_BTABLE_BRANCH.
8565 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8566 insns. This serves two purposes, first it prevents jump.c from
8567 noticing that the last N entries in the table jump to the instruction
8568 immediately after the table and deleting the jumps. Second, those
8569 insns mark where we should emit .begin_brtab and .end_brtab directives
8570 when using GAS (allows for better link time optimizations). */
8577 remove_useless_addtr_insns (1);
8579 if (pa_cpu < PROCESSOR_8000)
8580 pa_combine_instructions ();
8583 /* This is fairly cheap, so always run it if optimizing. */
8584 if (optimize > 0 && !TARGET_BIG_SWITCH)
8586 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8587 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8589 rtx pattern, tmp, location, label;
8590 unsigned int length, i;
8592 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8593 if (GET_CODE (insn) != JUMP_INSN
8594 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8595 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8598 /* Emit marker for the beginning of the branch table. */
8599 emit_insn_before (gen_begin_brtab (), insn);
8601 pattern = PATTERN (insn);
8602 location = PREV_INSN (insn);
8603 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8605 for (i = 0; i < length; i++)
8607 /* Emit a label before each jump to keep jump.c from
8608 removing this code. */
8609 tmp = gen_label_rtx ();
8610 LABEL_NUSES (tmp) = 1;
8611 emit_label_after (tmp, location);
8612 location = NEXT_INSN (location);
8614 if (GET_CODE (pattern) == ADDR_VEC)
8615 label = XEXP (XVECEXP (pattern, 0, i), 0);
8617 label = XEXP (XVECEXP (pattern, 1, i), 0);
8619 tmp = gen_short_jump (label);
8621 /* Emit the jump itself. */
8622 tmp = emit_jump_insn_after (tmp, location);
8623 JUMP_LABEL (tmp) = label;
8624 LABEL_NUSES (label)++;
8625 location = NEXT_INSN (location);
8627 /* Emit a BARRIER after the jump. */
8628 emit_barrier_after (location);
8629 location = NEXT_INSN (location);
8632 /* Emit marker for the end of the branch table. */
8633 emit_insn_before (gen_end_brtab (), location);
8634 location = NEXT_INSN (location);
8635 emit_barrier_after (location);
8637 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8643 /* Still need brtab marker insns. FIXME: the presence of these
8644 markers disables output of the branch table to readonly memory,
8645 and any alignment directives that might be needed. Possibly,
8646 the begin_brtab insn should be output before the label for the
8647 table. This doesn't matter at the moment since the tables are
8648 always output in the text section. */
8649 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8651 /* Find an ADDR_VEC insn. */
8652 if (GET_CODE (insn) != JUMP_INSN
8653 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8654 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8657 /* Now generate markers for the beginning and end of the
8659 emit_insn_before (gen_begin_brtab (), insn);
8660 emit_insn_after (gen_end_brtab (), insn);
8665 /* The PA has a number of odd instructions which can perform multiple
8666 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8667 it may be profitable to combine two instructions into one instruction
8668 with two outputs. It's not profitable PA2.0 machines because the
8669 two outputs would take two slots in the reorder buffers.
8671 This routine finds instructions which can be combined and combines
8672 them. We only support some of the potential combinations, and we
8673 only try common ways to find suitable instructions.
8675 * addb can add two registers or a register and a small integer
8676 and jump to a nearby (+-8k) location. Normally the jump to the
8677 nearby location is conditional on the result of the add, but by
8678 using the "true" condition we can make the jump unconditional.
8679 Thus addb can perform two independent operations in one insn.
8681 * movb is similar to addb in that it can perform a reg->reg
8682 or small immediate->reg copy and jump to a nearby (+-8k location).
8684 * fmpyadd and fmpysub can perform a FP multiply and either an
8685 FP add or FP sub if the operands of the multiply and add/sub are
8686 independent (there are other minor restrictions). Note both
8687 the fmpy and fadd/fsub can in theory move to better spots according
8688 to data dependencies, but for now we require the fmpy stay at a
8691 * Many of the memory operations can perform pre & post updates
8692 of index registers. GCC's pre/post increment/decrement addressing
8693 is far too simple to take advantage of all the possibilities. This
8694 pass may not be suitable since those insns may not be independent.
8696 * comclr can compare two ints or an int and a register, nullify
8697 the following instruction and zero some other register. This
8698 is more difficult to use as it's harder to find an insn which
8699 will generate a comclr than finding something like an unconditional
8700 branch. (conditional moves & long branches create comclr insns).
8702 * Most arithmetic operations can conditionally skip the next
8703 instruction. They can be viewed as "perform this operation
8704 and conditionally jump to this nearby location" (where nearby
8705 is an insns away). These are difficult to use due to the
8706 branch length restrictions. */
8709 pa_combine_instructions (void)
8713 /* This can get expensive since the basic algorithm is on the
8714 order of O(n^2) (or worse). Only do it for -O2 or higher
8715 levels of optimization. */
8719 /* Walk down the list of insns looking for "anchor" insns which
8720 may be combined with "floating" insns. As the name implies,
8721 "anchor" instructions don't move, while "floating" insns may
8723 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8724 new = make_insn_raw (new);
8726 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8728 enum attr_pa_combine_type anchor_attr;
8729 enum attr_pa_combine_type floater_attr;
8731 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8732 Also ignore any special USE insns. */
8733 if ((GET_CODE (anchor) != INSN
8734 && GET_CODE (anchor) != JUMP_INSN
8735 && GET_CODE (anchor) != CALL_INSN)
8736 || GET_CODE (PATTERN (anchor)) == USE
8737 || GET_CODE (PATTERN (anchor)) == CLOBBER
8738 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8739 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8742 anchor_attr = get_attr_pa_combine_type (anchor);
8743 /* See if anchor is an insn suitable for combination. */
8744 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8745 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8746 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8747 && ! forward_branch_p (anchor)))
8751 for (floater = PREV_INSN (anchor);
8753 floater = PREV_INSN (floater))
8755 if (GET_CODE (floater) == NOTE
8756 || (GET_CODE (floater) == INSN
8757 && (GET_CODE (PATTERN (floater)) == USE
8758 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8761 /* Anything except a regular INSN will stop our search. */
8762 if (GET_CODE (floater) != INSN
8763 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8764 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8770 /* See if FLOATER is suitable for combination with the
8772 floater_attr = get_attr_pa_combine_type (floater);
8773 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8774 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8775 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8776 && floater_attr == PA_COMBINE_TYPE_FMPY))
8778 /* If ANCHOR and FLOATER can be combined, then we're
8779 done with this pass. */
8780 if (pa_can_combine_p (new, anchor, floater, 0,
8781 SET_DEST (PATTERN (floater)),
8782 XEXP (SET_SRC (PATTERN (floater)), 0),
8783 XEXP (SET_SRC (PATTERN (floater)), 1)))
8787 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8788 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8790 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8792 if (pa_can_combine_p (new, anchor, floater, 0,
8793 SET_DEST (PATTERN (floater)),
8794 XEXP (SET_SRC (PATTERN (floater)), 0),
8795 XEXP (SET_SRC (PATTERN (floater)), 1)))
8800 if (pa_can_combine_p (new, anchor, floater, 0,
8801 SET_DEST (PATTERN (floater)),
8802 SET_SRC (PATTERN (floater)),
8803 SET_SRC (PATTERN (floater))))
8809 /* If we didn't find anything on the backwards scan try forwards. */
8811 && (anchor_attr == PA_COMBINE_TYPE_FMPY
8812 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8814 for (floater = anchor; floater; floater = NEXT_INSN (floater))
8816 if (GET_CODE (floater) == NOTE
8817 || (GET_CODE (floater) == INSN
8818 && (GET_CODE (PATTERN (floater)) == USE
8819 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8823 /* Anything except a regular INSN will stop our search. */
8824 if (GET_CODE (floater) != INSN
8825 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8826 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8832 /* See if FLOATER is suitable for combination with the
8834 floater_attr = get_attr_pa_combine_type (floater);
8835 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8836 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8837 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8838 && floater_attr == PA_COMBINE_TYPE_FMPY))
8840 /* If ANCHOR and FLOATER can be combined, then we're
8841 done with this pass. */
8842 if (pa_can_combine_p (new, anchor, floater, 1,
8843 SET_DEST (PATTERN (floater)),
8844 XEXP (SET_SRC (PATTERN (floater)),
8846 XEXP (SET_SRC (PATTERN (floater)),
8853 /* FLOATER will be nonzero if we found a suitable floating
8854 insn for combination with ANCHOR. */
8856 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8857 || anchor_attr == PA_COMBINE_TYPE_FMPY))
8859 /* Emit the new instruction and delete the old anchor. */
8860 emit_insn_before (gen_rtx_PARALLEL
8862 gen_rtvec (2, PATTERN (anchor),
8863 PATTERN (floater))),
8866 SET_INSN_DELETED (anchor);
8868 /* Emit a special USE insn for FLOATER, then delete
8869 the floating insn. */
8870 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8871 delete_insn (floater);
8876 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8879 /* Emit the new_jump instruction and delete the old anchor. */
8881 = emit_jump_insn_before (gen_rtx_PARALLEL
8883 gen_rtvec (2, PATTERN (anchor),
8884 PATTERN (floater))),
8887 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8888 SET_INSN_DELETED (anchor);
8890 /* Emit a special USE insn for FLOATER, then delete
8891 the floating insn. */
8892 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8893 delete_insn (floater);
8901 pa_can_combine_p (rtx new, rtx anchor, rtx floater, int reversed, rtx dest,
8904 int insn_code_number;
8907 /* Create a PARALLEL with the patterns of ANCHOR and
8908 FLOATER, try to recognize it, then test constraints
8909 for the resulting pattern.
8911 If the pattern doesn't match or the constraints
8912 aren't met keep searching for a suitable floater
8914 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8915 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8916 INSN_CODE (new) = -1;
8917 insn_code_number = recog_memoized (new);
8918 if (insn_code_number < 0
8919 || (extract_insn (new), ! constrain_operands (1)))
8933 /* There's up to three operands to consider. One
8934 output and two inputs.
8936 The output must not be used between FLOATER & ANCHOR
8937 exclusive. The inputs must not be set between
8938 FLOATER and ANCHOR exclusive. */
8940 if (reg_used_between_p (dest, start, end))
8943 if (reg_set_between_p (src1, start, end))
8946 if (reg_set_between_p (src2, start, end))
8949 /* If we get here, then everything is good. */
8953 /* Return nonzero if references for INSN are delayed.
8955 Millicode insns are actually function calls with some special
8956 constraints on arguments and register usage.
8958 Millicode calls always expect their arguments in the integer argument
8959 registers, and always return their result in %r29 (ret1). They
8960 are expected to clobber their arguments, %r1, %r29, and the return
8961 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8963 This function tells reorg that the references to arguments and
8964 millicode calls do not appear to happen until after the millicode call.
8965 This allows reorg to put insns which set the argument registers into the
8966 delay slot of the millicode call -- thus they act more like traditional
8969 Note we cannot consider side effects of the insn to be delayed because
8970 the branch and link insn will clobber the return pointer. If we happened
8971 to use the return pointer in the delay slot of the call, then we lose.
8973 get_attr_type will try to recognize the given insn, so make sure to
8974 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8977 insn_refs_are_delayed (rtx insn)
8979 return ((GET_CODE (insn) == INSN
8980 && GET_CODE (PATTERN (insn)) != SEQUENCE
8981 && GET_CODE (PATTERN (insn)) != USE
8982 && GET_CODE (PATTERN (insn)) != CLOBBER
8983 && get_attr_type (insn) == TYPE_MILLI));
8986 /* On the HP-PA the value is found in register(s) 28(-29), unless
8987 the mode is SF or DF. Then the value is returned in fr4 (32).
8989 This must perform the same promotions as PROMOTE_MODE, else
8990 TARGET_PROMOTE_FUNCTION_RETURN will not work correctly.
8992 Small structures must be returned in a PARALLEL on PA64 in order
8993 to match the HP Compiler ABI. */
8996 function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
8998 enum machine_mode valmode;
9000 if (AGGREGATE_TYPE_P (valtype)
9001 || TREE_CODE (valtype) == COMPLEX_TYPE
9002 || TREE_CODE (valtype) == VECTOR_TYPE)
9006 /* Aggregates with a size less than or equal to 128 bits are
9007 returned in GR 28(-29). They are left justified. The pad
9008 bits are undefined. Larger aggregates are returned in
9012 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9014 for (i = 0; i < ub; i++)
9016 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9017 gen_rtx_REG (DImode, 28 + i),
9022 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9024 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9026 /* Aggregates 5 to 8 bytes in size are returned in general
9027 registers r28-r29 in the same manner as other non
9028 floating-point objects. The data is right-justified and
9029 zero-extended to 64 bits. This is opposite to the normal
9030 justification used on big endian targets and requires
9031 special treatment. */
9032 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9033 gen_rtx_REG (DImode, 28), const0_rtx);
9034 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9038 if ((INTEGRAL_TYPE_P (valtype)
9039 && TYPE_PRECISION (valtype) < BITS_PER_WORD)
9040 || POINTER_TYPE_P (valtype))
9041 valmode = word_mode;
9043 valmode = TYPE_MODE (valtype);
9045 if (TREE_CODE (valtype) == REAL_TYPE
9046 && !AGGREGATE_TYPE_P (valtype)
9047 && TYPE_MODE (valtype) != TFmode
9048 && !TARGET_SOFT_FLOAT)
9049 return gen_rtx_REG (valmode, 32);
9051 return gen_rtx_REG (valmode, 28);
9054 /* Return the location of a parameter that is passed in a register or NULL
9055 if the parameter has any component that is passed in memory.
9057 This is new code and will be pushed to into the net sources after
9060 ??? We might want to restructure this so that it looks more like other
9063 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
9064 int named ATTRIBUTE_UNUSED)
9066 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9073 if (mode == VOIDmode)
9076 arg_size = FUNCTION_ARG_SIZE (mode, type);
9078 /* If this arg would be passed partially or totally on the stack, then
9079 this routine should return zero. pa_arg_partial_bytes will
9080 handle arguments which are split between regs and stack slots if
9081 the ABI mandates split arguments. */
9084 /* The 32-bit ABI does not split arguments. */
9085 if (cum->words + arg_size > max_arg_words)
9091 alignment = cum->words & 1;
9092 if (cum->words + alignment >= max_arg_words)
9096 /* The 32bit ABIs and the 64bit ABIs are rather different,
9097 particularly in their handling of FP registers. We might
9098 be able to cleverly share code between them, but I'm not
9099 going to bother in the hope that splitting them up results
9100 in code that is more easily understood. */
9104 /* Advance the base registers to their current locations.
9106 Remember, gprs grow towards smaller register numbers while
9107 fprs grow to higher register numbers. Also remember that
9108 although FP regs are 32-bit addressable, we pretend that
9109 the registers are 64-bits wide. */
9110 gpr_reg_base = 26 - cum->words;
9111 fpr_reg_base = 32 + cum->words;
9113 /* Arguments wider than one word and small aggregates need special
9117 || (type && (AGGREGATE_TYPE_P (type)
9118 || TREE_CODE (type) == COMPLEX_TYPE
9119 || TREE_CODE (type) == VECTOR_TYPE)))
9121 /* Double-extended precision (80-bit), quad-precision (128-bit)
9122 and aggregates including complex numbers are aligned on
9123 128-bit boundaries. The first eight 64-bit argument slots
9124 are associated one-to-one, with general registers r26
9125 through r19, and also with floating-point registers fr4
9126 through fr11. Arguments larger than one word are always
9127 passed in general registers.
9129 Using a PARALLEL with a word mode register results in left
9130 justified data on a big-endian target. */
9133 int i, offset = 0, ub = arg_size;
9135 /* Align the base register. */
9136 gpr_reg_base -= alignment;
9138 ub = MIN (ub, max_arg_words - cum->words - alignment);
9139 for (i = 0; i < ub; i++)
9141 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9142 gen_rtx_REG (DImode, gpr_reg_base),
9148 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9153 /* If the argument is larger than a word, then we know precisely
9154 which registers we must use. */
9168 /* Structures 5 to 8 bytes in size are passed in the general
9169 registers in the same manner as other non floating-point
9170 objects. The data is right-justified and zero-extended
9171 to 64 bits. This is opposite to the normal justification
9172 used on big endian targets and requires special treatment.
9173 We now define BLOCK_REG_PADDING to pad these objects.
9174 Aggregates, complex and vector types are passed in the same
9175 manner as structures. */
9177 || (type && (AGGREGATE_TYPE_P (type)
9178 || TREE_CODE (type) == COMPLEX_TYPE
9179 || TREE_CODE (type) == VECTOR_TYPE)))
9181 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9182 gen_rtx_REG (DImode, gpr_reg_base),
9184 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9189 /* We have a single word (32 bits). A simple computation
9190 will get us the register #s we need. */
9191 gpr_reg_base = 26 - cum->words;
9192 fpr_reg_base = 32 + 2 * cum->words;
9196 /* Determine if the argument needs to be passed in both general and
9197 floating point registers. */
9198 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9199 /* If we are doing soft-float with portable runtime, then there
9200 is no need to worry about FP regs. */
9201 && !TARGET_SOFT_FLOAT
9202 /* The parameter must be some kind of scalar float, else we just
9203 pass it in integer registers. */
9204 && GET_MODE_CLASS (mode) == MODE_FLOAT
9205 /* The target function must not have a prototype. */
9206 && cum->nargs_prototype <= 0
9207 /* libcalls do not need to pass items in both FP and general
9209 && type != NULL_TREE
9210 /* All this hair applies to "outgoing" args only. This includes
9211 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9213 /* Also pass outgoing floating arguments in both registers in indirect
9214 calls with the 32 bit ABI and the HP assembler since there is no
9215 way to the specify argument locations in static functions. */
9220 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9226 gen_rtx_EXPR_LIST (VOIDmode,
9227 gen_rtx_REG (mode, fpr_reg_base),
9229 gen_rtx_EXPR_LIST (VOIDmode,
9230 gen_rtx_REG (mode, gpr_reg_base),
9235 /* See if we should pass this parameter in a general register. */
9236 if (TARGET_SOFT_FLOAT
9237 /* Indirect calls in the normal 32bit ABI require all arguments
9238 to be passed in general registers. */
9239 || (!TARGET_PORTABLE_RUNTIME
9243 /* If the parameter is not a scalar floating-point parameter,
9244 then it belongs in GPRs. */
9245 || GET_MODE_CLASS (mode) != MODE_FLOAT
9246 /* Structure with single SFmode field belongs in GPR. */
9247 || (type && AGGREGATE_TYPE_P (type)))
9248 retval = gen_rtx_REG (mode, gpr_reg_base);
9250 retval = gen_rtx_REG (mode, fpr_reg_base);
9256 /* If this arg would be passed totally in registers or totally on the stack,
9257 then this routine should return zero. */
9260 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9261 tree type, bool named ATTRIBUTE_UNUSED)
9263 unsigned int max_arg_words = 8;
9264 unsigned int offset = 0;
9269 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9272 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9273 /* Arg fits fully into registers. */
9275 else if (cum->words + offset >= max_arg_words)
9276 /* Arg fully on the stack. */
9280 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9284 /* A get_unnamed_section callback for switching to the text section.
9286 This function is only used with SOM. Because we don't support
9287 named subspaces, we can only create a new subspace or switch back
9288 to the default text subspace. */
9291 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9293 gcc_assert (TARGET_SOM);
9296 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9298 /* We only want to emit a .nsubspa directive once at the
9299 start of the function. */
9300 cfun->machine->in_nsubspa = 1;
9302 /* Create a new subspace for the text. This provides
9303 better stub placement and one-only functions. */
9305 && DECL_ONE_ONLY (cfun->decl)
9306 && !DECL_WEAK (cfun->decl))
9308 output_section_asm_op ("\t.SPACE $TEXT$\n"
9309 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9310 "ACCESS=44,SORT=24,COMDAT");
9316 /* There isn't a current function or the body of the current
9317 function has been completed. So, we are changing to the
9318 text section to output debugging information. Thus, we
9319 need to forget that we are in the text section so that
9320 varasm.c will call us when text_section is selected again. */
9321 gcc_assert (!cfun || !cfun->machine
9322 || cfun->machine->in_nsubspa == 2);
9325 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9328 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9331 /* A get_unnamed_section callback for switching to comdat data
9332 sections. This function is only used with SOM. */
9335 som_output_comdat_data_section_asm_op (const void *data)
9338 output_section_asm_op (data);
9341 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9344 pa_som_asm_init_sections (void)
9347 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9349 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9350 is not being generated. */
9351 som_readonly_data_section
9352 = get_unnamed_section (0, output_section_asm_op,
9353 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9355 /* When secondary definitions are not supported, SOM makes readonly
9356 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9358 som_one_only_readonly_data_section
9359 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9361 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9362 "ACCESS=0x2c,SORT=16,COMDAT");
9365 /* When secondary definitions are not supported, SOM makes data one-only
9366 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9367 som_one_only_data_section
9368 = get_unnamed_section (SECTION_WRITE,
9369 som_output_comdat_data_section_asm_op,
9370 "\t.SPACE $PRIVATE$\n"
9371 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9372 "ACCESS=31,SORT=24,COMDAT");
9374 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9375 which reference data within the $TEXT$ space (for example constant
9376 strings in the $LIT$ subspace).
9378 The assemblers (GAS and HP as) both have problems with handling
9379 the difference of two symbols which is the other correct way to
9380 reference constant data during PIC code generation.
9382 So, there's no way to reference constant data which is in the
9383 $TEXT$ space during PIC generation. Instead place all constant
9384 data into the $PRIVATE$ subspace (this reduces sharing, but it
9385 works correctly). */
9386 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9388 /* We must not have a reference to an external symbol defined in a
9389 shared library in a readonly section, else the SOM linker will
9392 So, we force exception information into the data section. */
9393 exception_section = data_section;
9396 /* On hpux10, the linker will give an error if we have a reference
9397 in the read-only data section to a symbol defined in a shared
9398 library. Therefore, expressions that might require a reloc can
9399 not be placed in the read-only data section. */
9402 pa_select_section (tree exp, int reloc,
9403 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9405 if (TREE_CODE (exp) == VAR_DECL
9406 && TREE_READONLY (exp)
9407 && !TREE_THIS_VOLATILE (exp)
9408 && DECL_INITIAL (exp)
9409 && (DECL_INITIAL (exp) == error_mark_node
9410 || TREE_CONSTANT (DECL_INITIAL (exp)))
9414 && DECL_ONE_ONLY (exp)
9415 && !DECL_WEAK (exp))
9416 return som_one_only_readonly_data_section;
9418 return readonly_data_section;
9420 else if (CONSTANT_CLASS_P (exp) && !reloc)
9421 return readonly_data_section;
9423 && TREE_CODE (exp) == VAR_DECL
9424 && DECL_ONE_ONLY (exp)
9425 && !DECL_WEAK (exp))
9426 return som_one_only_data_section;
9428 return data_section;
9432 pa_globalize_label (FILE *stream, const char *name)
9434 /* We only handle DATA objects here, functions are globalized in
9435 ASM_DECLARE_FUNCTION_NAME. */
9436 if (! FUNCTION_NAME_P (name))
9438 fputs ("\t.EXPORT ", stream);
9439 assemble_name (stream, name);
9440 fputs (",DATA\n", stream);
9444 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9447 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9448 int incoming ATTRIBUTE_UNUSED)
9450 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9453 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9456 pa_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
9458 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9459 PA64 ABI says that objects larger than 128 bits are returned in memory.
9460 Note, int_size_in_bytes can return -1 if the size of the object is
9461 variable or larger than the maximum value that can be expressed as
9462 a HOST_WIDE_INT. It can also return zero for an empty type. The
9463 simplest way to handle variable and empty types is to pass them in
9464 memory. This avoids problems in defining the boundaries of argument
9465 slots, allocating registers, etc. */
9466 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9467 || int_size_in_bytes (type) <= 0);
9470 /* Structure to hold declaration and name of external symbols that are
9471 emitted by GCC. We generate a vector of these symbols and output them
9472 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9473 This avoids putting out names that are never really used. */
9475 typedef struct extern_symbol GTY(())
9481 /* Define gc'd vector type for extern_symbol. */
9482 DEF_VEC_O(extern_symbol);
9483 DEF_VEC_ALLOC_O(extern_symbol,gc);
9485 /* Vector of extern_symbol pointers. */
9486 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9488 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9489 /* Mark DECL (name NAME) as an external reference (assembler output
9490 file FILE). This saves the names to output at the end of the file
9491 if actually referenced. */
9494 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9496 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9498 gcc_assert (file == asm_out_file);
9503 /* Output text required at the end of an assembler file.
9504 This includes deferred plabels and .import directives for
9505 all external symbols that were actually referenced. */
9508 pa_hpux_file_end (void)
9513 if (!NO_DEFERRED_PROFILE_COUNTERS)
9514 output_deferred_profile_counters ();
9516 output_deferred_plabels ();
9518 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9520 tree decl = p->decl;
9522 if (!TREE_ASM_WRITTEN (decl)
9523 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9524 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9527 VEC_free (extern_symbol, gc, extern_symbols);