1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "integrate.h"
49 #include "target-def.h"
51 /* Return nonzero if there is a bypass for the output of
52 OUT_INSN and the fp store IN_INSN. */
54 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
56 enum machine_mode store_mode;
57 enum machine_mode other_mode;
60 if (recog_memoized (in_insn) < 0
61 || (get_attr_type (in_insn) != TYPE_FPSTORE
62 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
63 || recog_memoized (out_insn) < 0)
66 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
68 set = single_set (out_insn);
72 other_mode = GET_MODE (SET_SRC (set));
74 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
78 #ifndef DO_FRAME_NOTES
79 #ifdef INCOMING_RETURN_ADDR_RTX
80 #define DO_FRAME_NOTES 1
82 #define DO_FRAME_NOTES 0
86 static void copy_reg_pointer (rtx, rtx);
87 static void fix_range (const char *);
88 static bool pa_handle_option (size_t, const char *, int);
89 static int hppa_address_cost (rtx);
90 static bool hppa_rtx_costs (rtx, int, int, int *);
91 static inline rtx force_mode (enum machine_mode, rtx);
92 static void pa_reorg (void);
93 static void pa_combine_instructions (void);
94 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
95 static int forward_branch_p (rtx);
96 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
97 static int compute_movmem_length (rtx);
98 static int compute_clrmem_length (rtx);
99 static bool pa_assemble_integer (rtx, unsigned int, int);
100 static void remove_useless_addtr_insns (int);
101 static void store_reg (int, HOST_WIDE_INT, int);
102 static void store_reg_modify (int, int, HOST_WIDE_INT);
103 static void load_reg (int, HOST_WIDE_INT, int);
104 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
105 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
106 static void update_total_code_bytes (int);
107 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
108 static int pa_adjust_cost (rtx, rtx, rtx, int);
109 static int pa_adjust_priority (rtx, int);
110 static int pa_issue_rate (void);
111 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
112 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
114 static void pa_encode_section_info (tree, rtx, int);
115 static const char *pa_strip_name_encoding (const char *);
116 static bool pa_function_ok_for_sibcall (tree, tree);
117 static void pa_globalize_label (FILE *, const char *)
119 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
120 HOST_WIDE_INT, tree);
121 #if !defined(USE_COLLECT2)
122 static void pa_asm_out_constructor (rtx, int);
123 static void pa_asm_out_destructor (rtx, int);
125 static void pa_init_builtins (void);
126 static rtx hppa_builtin_saveregs (void);
127 static tree hppa_gimplify_va_arg_expr (tree, tree, tree *, tree *);
128 static bool pa_scalar_mode_supported_p (enum machine_mode);
129 static bool pa_commutative_p (rtx x, int outer_code);
130 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
131 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
132 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
133 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
134 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
135 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
136 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
137 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
138 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
139 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
140 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
141 static void output_deferred_plabels (void);
142 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
143 #ifdef ASM_OUTPUT_EXTERNAL_REAL
144 static void pa_hpux_file_end (void);
146 #ifdef HPUX_LONG_DOUBLE_LIBRARY
147 static void pa_hpux_init_libfuncs (void);
149 static rtx pa_struct_value_rtx (tree, int);
150 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
152 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
154 static struct machine_function * pa_init_machine_status (void);
155 static enum reg_class pa_secondary_reload (bool, rtx, enum reg_class,
157 secondary_reload_info *);
160 /* The following extra sections are only used for SOM. */
161 static GTY(()) section *som_readonly_data_section;
162 static GTY(()) section *som_one_only_readonly_data_section;
163 static GTY(()) section *som_one_only_data_section;
165 /* Save the operands last given to a compare for use when we
166 generate a scc or bcc insn. */
167 rtx hppa_compare_op0, hppa_compare_op1;
168 enum cmp_type hppa_branch_type;
170 /* Which cpu we are scheduling for. */
171 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
173 /* The UNIX standard to use for predefines and linking. */
174 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
176 /* Counts for the number of callee-saved general and floating point
177 registers which were saved by the current function's prologue. */
178 static int gr_saved, fr_saved;
180 static rtx find_addr_reg (rtx);
182 /* Keep track of the number of bytes we have output in the CODE subspace
183 during this compilation so we'll know when to emit inline long-calls. */
184 unsigned long total_code_bytes;
186 /* The last address of the previous function plus the number of bytes in
187 associated thunks that have been output. This is used to determine if
188 a thunk can use an IA-relative branch to reach its target function. */
189 static int last_address;
191 /* Variables to handle plabels that we discover are necessary at assembly
192 output time. They are output after the current function. */
193 struct deferred_plabel GTY(())
198 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
200 static size_t n_deferred_plabels = 0;
203 /* Initialize the GCC target structure. */
205 #undef TARGET_ASM_ALIGNED_HI_OP
206 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
207 #undef TARGET_ASM_ALIGNED_SI_OP
208 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
209 #undef TARGET_ASM_ALIGNED_DI_OP
210 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
211 #undef TARGET_ASM_UNALIGNED_HI_OP
212 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
213 #undef TARGET_ASM_UNALIGNED_SI_OP
214 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
215 #undef TARGET_ASM_UNALIGNED_DI_OP
216 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
217 #undef TARGET_ASM_INTEGER
218 #define TARGET_ASM_INTEGER pa_assemble_integer
220 #undef TARGET_ASM_FUNCTION_PROLOGUE
221 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
222 #undef TARGET_ASM_FUNCTION_EPILOGUE
223 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
225 #undef TARGET_SCHED_ADJUST_COST
226 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
227 #undef TARGET_SCHED_ADJUST_PRIORITY
228 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
229 #undef TARGET_SCHED_ISSUE_RATE
230 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
232 #undef TARGET_ENCODE_SECTION_INFO
233 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
234 #undef TARGET_STRIP_NAME_ENCODING
235 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
237 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
238 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
240 #undef TARGET_COMMUTATIVE_P
241 #define TARGET_COMMUTATIVE_P pa_commutative_p
243 #undef TARGET_ASM_OUTPUT_MI_THUNK
244 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
245 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
246 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
248 #undef TARGET_ASM_FILE_END
249 #ifdef ASM_OUTPUT_EXTERNAL_REAL
250 #define TARGET_ASM_FILE_END pa_hpux_file_end
252 #define TARGET_ASM_FILE_END output_deferred_plabels
255 #if !defined(USE_COLLECT2)
256 #undef TARGET_ASM_CONSTRUCTOR
257 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
258 #undef TARGET_ASM_DESTRUCTOR
259 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
262 #undef TARGET_DEFAULT_TARGET_FLAGS
263 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
264 #undef TARGET_HANDLE_OPTION
265 #define TARGET_HANDLE_OPTION pa_handle_option
267 #undef TARGET_INIT_BUILTINS
268 #define TARGET_INIT_BUILTINS pa_init_builtins
270 #undef TARGET_RTX_COSTS
271 #define TARGET_RTX_COSTS hppa_rtx_costs
272 #undef TARGET_ADDRESS_COST
273 #define TARGET_ADDRESS_COST hppa_address_cost
275 #undef TARGET_MACHINE_DEPENDENT_REORG
276 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
278 #ifdef HPUX_LONG_DOUBLE_LIBRARY
279 #undef TARGET_INIT_LIBFUNCS
280 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
283 #undef TARGET_PROMOTE_FUNCTION_RETURN
284 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
285 #undef TARGET_PROMOTE_PROTOTYPES
286 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
288 #undef TARGET_STRUCT_VALUE_RTX
289 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
290 #undef TARGET_RETURN_IN_MEMORY
291 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
292 #undef TARGET_MUST_PASS_IN_STACK
293 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
294 #undef TARGET_PASS_BY_REFERENCE
295 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
296 #undef TARGET_CALLEE_COPIES
297 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
298 #undef TARGET_ARG_PARTIAL_BYTES
299 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
301 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
302 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
303 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
304 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
306 #undef TARGET_SCALAR_MODE_SUPPORTED_P
307 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
309 #undef TARGET_CANNOT_FORCE_CONST_MEM
310 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
312 #undef TARGET_SECONDARY_RELOAD
313 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
315 struct gcc_target targetm = TARGET_INITIALIZER;
317 /* Parse the -mfixed-range= option string. */
320 fix_range (const char *const_str)
323 char *str, *dash, *comma;
325 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
326 REG2 are either register names or register numbers. The effect
327 of this option is to mark the registers in the range from REG1 to
328 REG2 as ``fixed'' so they won't be used by the compiler. This is
329 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
331 i = strlen (const_str);
332 str = (char *) alloca (i + 1);
333 memcpy (str, const_str, i + 1);
337 dash = strchr (str, '-');
340 warning (0, "value of -mfixed-range must have form REG1-REG2");
345 comma = strchr (dash + 1, ',');
349 first = decode_reg_name (str);
352 warning (0, "unknown register name: %s", str);
356 last = decode_reg_name (dash + 1);
359 warning (0, "unknown register name: %s", dash + 1);
367 warning (0, "%s-%s is an empty range", str, dash + 1);
371 for (i = first; i <= last; ++i)
372 fixed_regs[i] = call_used_regs[i] = 1;
381 /* Check if all floating point registers have been fixed. */
382 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
387 target_flags |= MASK_DISABLE_FPREGS;
390 /* Implement TARGET_HANDLE_OPTION. */
393 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
398 case OPT_mpa_risc_1_0:
400 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
404 case OPT_mpa_risc_1_1:
406 target_flags &= ~MASK_PA_20;
407 target_flags |= MASK_PA_11;
410 case OPT_mpa_risc_2_0:
412 target_flags |= MASK_PA_11 | MASK_PA_20;
416 if (strcmp (arg, "8000") == 0)
417 pa_cpu = PROCESSOR_8000;
418 else if (strcmp (arg, "7100") == 0)
419 pa_cpu = PROCESSOR_7100;
420 else if (strcmp (arg, "700") == 0)
421 pa_cpu = PROCESSOR_700;
422 else if (strcmp (arg, "7100LC") == 0)
423 pa_cpu = PROCESSOR_7100LC;
424 else if (strcmp (arg, "7200") == 0)
425 pa_cpu = PROCESSOR_7200;
426 else if (strcmp (arg, "7300") == 0)
427 pa_cpu = PROCESSOR_7300;
432 case OPT_mfixed_range_:
442 #if TARGET_HPUX_10_10
448 #if TARGET_HPUX_11_11
460 override_options (void)
462 /* Unconditional branches in the delay slot are not compatible with dwarf2
463 call frame information. There is no benefit in using this optimization
464 on PA8000 and later processors. */
465 if (pa_cpu >= PROCESSOR_8000
466 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
467 || flag_unwind_tables)
468 target_flags &= ~MASK_JUMP_IN_DELAY;
470 if (flag_pic && TARGET_PORTABLE_RUNTIME)
472 warning (0, "PIC code generation is not supported in the portable runtime model");
475 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
477 warning (0, "PIC code generation is not compatible with fast indirect calls");
480 if (! TARGET_GAS && write_symbols != NO_DEBUG)
482 warning (0, "-g is only supported when using GAS on this processor,");
483 warning (0, "-g option disabled");
484 write_symbols = NO_DEBUG;
487 /* We only support the "big PIC" model now. And we always generate PIC
488 code when in 64bit mode. */
489 if (flag_pic == 1 || TARGET_64BIT)
492 /* We can't guarantee that .dword is available for 32-bit targets. */
493 if (UNITS_PER_WORD == 4)
494 targetm.asm_out.aligned_op.di = NULL;
496 /* The unaligned ops are only available when using GAS. */
499 targetm.asm_out.unaligned_op.hi = NULL;
500 targetm.asm_out.unaligned_op.si = NULL;
501 targetm.asm_out.unaligned_op.di = NULL;
504 init_machine_status = pa_init_machine_status;
508 pa_init_builtins (void)
510 #ifdef DONT_HAVE_FPUTC_UNLOCKED
511 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
512 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
513 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
514 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
517 if (built_in_decls [BUILT_IN_FINITE])
518 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
519 if (built_in_decls [BUILT_IN_FINITEF])
520 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
524 /* Function to init struct machine_function.
525 This will be called, via a pointer variable,
526 from push_function_context. */
528 static struct machine_function *
529 pa_init_machine_status (void)
531 return ggc_alloc_cleared (sizeof (machine_function));
534 /* If FROM is a probable pointer register, mark TO as a probable
535 pointer register with the same pointer alignment as FROM. */
538 copy_reg_pointer (rtx to, rtx from)
540 if (REG_POINTER (from))
541 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
544 /* Return 1 if X contains a symbolic expression. We know these
545 expressions will have one of a few well defined forms, so
546 we need only check those forms. */
548 symbolic_expression_p (rtx x)
551 /* Strip off any HIGH. */
552 if (GET_CODE (x) == HIGH)
555 return (symbolic_operand (x, VOIDmode));
558 /* Accept any constant that can be moved in one instruction into a
561 cint_ok_for_move (HOST_WIDE_INT ival)
563 /* OK if ldo, ldil, or zdepi, can be used. */
564 return (VAL_14_BITS_P (ival)
565 || ldil_cint_p (ival)
566 || zdepi_cint_p (ival));
569 /* Return truth value of whether OP can be used as an operand in a
572 adddi3_operand (rtx op, enum machine_mode mode)
574 return (register_operand (op, mode)
575 || (GET_CODE (op) == CONST_INT
576 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
579 /* True iff the operand OP can be used as the destination operand of
580 an integer store. This also implies the operand could be used as
581 the source operand of an integer load. Symbolic, lo_sum and indexed
582 memory operands are not allowed. We accept reloading pseudos and
583 other memory operands. */
585 integer_store_memory_operand (rtx op, enum machine_mode mode)
587 return ((reload_in_progress
589 && REGNO (op) >= FIRST_PSEUDO_REGISTER
590 && reg_renumber [REGNO (op)] < 0)
591 || (GET_CODE (op) == MEM
592 && (reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
593 && !symbolic_memory_operand (op, VOIDmode)
594 && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
595 && !IS_INDEX_ADDR_P (XEXP (op, 0))));
598 /* True iff ldil can be used to load this CONST_INT. The least
599 significant 11 bits of the value must be zero and the value must
600 not change sign when extended from 32 to 64 bits. */
602 ldil_cint_p (HOST_WIDE_INT ival)
604 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
606 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
609 /* True iff zdepi can be used to generate this CONST_INT.
610 zdepi first sign extends a 5-bit signed number to a given field
611 length, then places this field anywhere in a zero. */
613 zdepi_cint_p (unsigned HOST_WIDE_INT x)
615 unsigned HOST_WIDE_INT lsb_mask, t;
617 /* This might not be obvious, but it's at least fast.
618 This function is critical; we don't have the time loops would take. */
620 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
621 /* Return true iff t is a power of two. */
622 return ((t & (t - 1)) == 0);
625 /* True iff depi or extru can be used to compute (reg & mask).
626 Accept bit pattern like these:
631 and_mask_p (unsigned HOST_WIDE_INT mask)
634 mask += mask & -mask;
635 return (mask & (mask - 1)) == 0;
638 /* True iff depi can be used to compute (reg | MASK). */
640 ior_mask_p (unsigned HOST_WIDE_INT mask)
642 mask += mask & -mask;
643 return (mask & (mask - 1)) == 0;
646 /* Legitimize PIC addresses. If the address is already
647 position-independent, we return ORIG. Newly generated
648 position-independent addresses go to REG. If we need more
649 than one register, we lose. */
652 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
656 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
658 /* Labels need special handling. */
659 if (pic_label_operand (orig, mode))
661 /* We do not want to go through the movXX expanders here since that
662 would create recursion.
664 Nor do we really want to call a generator for a named pattern
665 since that requires multiple patterns if we want to support
668 So instead we just emit the raw set, which avoids the movXX
669 expanders completely. */
670 mark_reg_pointer (reg, BITS_PER_UNIT);
671 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
672 current_function_uses_pic_offset_table = 1;
675 if (GET_CODE (orig) == SYMBOL_REF)
681 /* Before reload, allocate a temporary register for the intermediate
682 result. This allows the sequence to be deleted when the final
683 result is unused and the insns are trivially dead. */
684 tmp_reg = ((reload_in_progress || reload_completed)
685 ? reg : gen_reg_rtx (Pmode));
687 emit_move_insn (tmp_reg,
688 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
689 gen_rtx_HIGH (word_mode, orig)));
691 = gen_const_mem (Pmode,
692 gen_rtx_LO_SUM (Pmode, tmp_reg,
693 gen_rtx_UNSPEC (Pmode,
697 current_function_uses_pic_offset_table = 1;
698 mark_reg_pointer (reg, BITS_PER_UNIT);
699 insn = emit_move_insn (reg, pic_ref);
701 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
702 set_unique_reg_note (insn, REG_EQUAL, orig);
706 else if (GET_CODE (orig) == CONST)
710 if (GET_CODE (XEXP (orig, 0)) == PLUS
711 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
715 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
717 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
718 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
719 base == reg ? 0 : reg);
721 if (GET_CODE (orig) == CONST_INT)
723 if (INT_14_BITS (orig))
724 return plus_constant (base, INTVAL (orig));
725 orig = force_reg (Pmode, orig);
727 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
728 /* Likewise, should we set special REG_NOTEs here? */
734 static GTY(()) rtx gen_tls_tga;
737 gen_tls_get_addr (void)
740 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
745 hppa_tls_call (rtx arg)
749 ret = gen_reg_rtx (Pmode);
750 emit_library_call_value (gen_tls_get_addr (), ret,
751 LCT_CONST, Pmode, 1, arg, Pmode);
757 legitimize_tls_address (rtx addr)
759 rtx ret, insn, tmp, t1, t2, tp;
760 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
764 case TLS_MODEL_GLOBAL_DYNAMIC:
765 tmp = gen_reg_rtx (Pmode);
767 emit_insn (gen_tgd_load_pic (tmp, addr));
769 emit_insn (gen_tgd_load (tmp, addr));
770 ret = hppa_tls_call (tmp);
773 case TLS_MODEL_LOCAL_DYNAMIC:
774 ret = gen_reg_rtx (Pmode);
775 tmp = gen_reg_rtx (Pmode);
778 emit_insn (gen_tld_load_pic (tmp, addr));
780 emit_insn (gen_tld_load (tmp, addr));
781 t1 = hppa_tls_call (tmp);
784 t2 = gen_reg_rtx (Pmode);
785 emit_libcall_block (insn, t2, t1,
786 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
788 emit_insn (gen_tld_offset_load (ret, addr, t2));
791 case TLS_MODEL_INITIAL_EXEC:
792 tp = gen_reg_rtx (Pmode);
793 tmp = gen_reg_rtx (Pmode);
794 ret = gen_reg_rtx (Pmode);
795 emit_insn (gen_tp_load (tp));
797 emit_insn (gen_tie_load_pic (tmp, addr));
799 emit_insn (gen_tie_load (tmp, addr));
800 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
803 case TLS_MODEL_LOCAL_EXEC:
804 tp = gen_reg_rtx (Pmode);
805 ret = gen_reg_rtx (Pmode);
806 emit_insn (gen_tp_load (tp));
807 emit_insn (gen_tle_load (ret, addr, tp));
817 /* Try machine-dependent ways of modifying an illegitimate address
818 to be legitimate. If we find one, return the new, valid address.
819 This macro is used in only one place: `memory_address' in explow.c.
821 OLDX is the address as it was before break_out_memory_refs was called.
822 In some cases it is useful to look at this to decide what needs to be done.
824 MODE and WIN are passed so that this macro can use
825 GO_IF_LEGITIMATE_ADDRESS.
827 It is always safe for this macro to do nothing. It exists to recognize
828 opportunities to optimize the output.
830 For the PA, transform:
832 memory(X + <large int>)
836 if (<large int> & mask) >= 16
837 Y = (<large int> & ~mask) + mask + 1 Round up.
839 Y = (<large int> & ~mask) Round down.
841 memory (Z + (<large int> - Y));
843 This is for CSE to find several similar references, and only use one Z.
845 X can either be a SYMBOL_REF or REG, but because combine cannot
846 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
847 D will not fit in 14 bits.
849 MODE_FLOAT references allow displacements which fit in 5 bits, so use
852 MODE_INT references allow displacements which fit in 14 bits, so use
855 This relies on the fact that most mode MODE_FLOAT references will use FP
856 registers and most mode MODE_INT references will use integer registers.
857 (In the rare case of an FP register used in an integer MODE, we depend
858 on secondary reloads to clean things up.)
861 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
862 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
863 addressing modes to be used).
865 Put X and Z into registers. Then put the entire expression into
869 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
870 enum machine_mode mode)
874 /* We need to canonicalize the order of operands in unscaled indexed
875 addresses since the code that checks if an address is valid doesn't
876 always try both orders. */
877 if (!TARGET_NO_SPACE_REGS
878 && GET_CODE (x) == PLUS
879 && GET_MODE (x) == Pmode
880 && REG_P (XEXP (x, 0))
881 && REG_P (XEXP (x, 1))
882 && REG_POINTER (XEXP (x, 0))
883 && !REG_POINTER (XEXP (x, 1)))
884 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
886 if (PA_SYMBOL_REF_TLS_P (x))
887 return legitimize_tls_address (x);
889 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
891 /* Strip off CONST. */
892 if (GET_CODE (x) == CONST)
895 /* Special case. Get the SYMBOL_REF into a register and use indexing.
896 That should always be safe. */
897 if (GET_CODE (x) == PLUS
898 && GET_CODE (XEXP (x, 0)) == REG
899 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
901 rtx reg = force_reg (Pmode, XEXP (x, 1));
902 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
905 /* Note we must reject symbols which represent function addresses
906 since the assembler/linker can't handle arithmetic on plabels. */
907 if (GET_CODE (x) == PLUS
908 && GET_CODE (XEXP (x, 1)) == CONST_INT
909 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
910 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
911 || GET_CODE (XEXP (x, 0)) == REG))
913 rtx int_part, ptr_reg;
915 int offset = INTVAL (XEXP (x, 1));
918 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
919 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
921 /* Choose which way to round the offset. Round up if we
922 are >= halfway to the next boundary. */
923 if ((offset & mask) >= ((mask + 1) / 2))
924 newoffset = (offset & ~ mask) + mask + 1;
926 newoffset = (offset & ~ mask);
928 /* If the newoffset will not fit in 14 bits (ldo), then
929 handling this would take 4 or 5 instructions (2 to load
930 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
931 add the new offset and the SYMBOL_REF.) Combine can
932 not handle 4->2 or 5->2 combinations, so do not create
934 if (! VAL_14_BITS_P (newoffset)
935 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
937 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
940 gen_rtx_HIGH (Pmode, const_part));
943 gen_rtx_LO_SUM (Pmode,
944 tmp_reg, const_part));
948 if (! VAL_14_BITS_P (newoffset))
949 int_part = force_reg (Pmode, GEN_INT (newoffset));
951 int_part = GEN_INT (newoffset);
953 ptr_reg = force_reg (Pmode,
955 force_reg (Pmode, XEXP (x, 0)),
958 return plus_constant (ptr_reg, offset - newoffset);
961 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
963 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
964 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
965 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
966 && (OBJECT_P (XEXP (x, 1))
967 || GET_CODE (XEXP (x, 1)) == SUBREG)
968 && GET_CODE (XEXP (x, 1)) != CONST)
970 int val = INTVAL (XEXP (XEXP (x, 0), 1));
974 if (GET_CODE (reg1) != REG)
975 reg1 = force_reg (Pmode, force_operand (reg1, 0));
977 reg2 = XEXP (XEXP (x, 0), 0);
978 if (GET_CODE (reg2) != REG)
979 reg2 = force_reg (Pmode, force_operand (reg2, 0));
981 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
988 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
990 Only do so for floating point modes since this is more speculative
991 and we lose if it's an integer store. */
992 if (GET_CODE (x) == PLUS
993 && GET_CODE (XEXP (x, 0)) == PLUS
994 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
995 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
996 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
997 && (mode == SFmode || mode == DFmode))
1000 /* First, try and figure out what to use as a base register. */
1001 rtx reg1, reg2, base, idx, orig_base;
1003 reg1 = XEXP (XEXP (x, 0), 1);
1008 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1009 then emit_move_sequence will turn on REG_POINTER so we'll know
1010 it's a base register below. */
1011 if (GET_CODE (reg1) != REG)
1012 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1014 if (GET_CODE (reg2) != REG)
1015 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1017 /* Figure out what the base and index are. */
1019 if (GET_CODE (reg1) == REG
1020 && REG_POINTER (reg1))
1023 orig_base = XEXP (XEXP (x, 0), 1);
1024 idx = gen_rtx_PLUS (Pmode,
1025 gen_rtx_MULT (Pmode,
1026 XEXP (XEXP (XEXP (x, 0), 0), 0),
1027 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1030 else if (GET_CODE (reg2) == REG
1031 && REG_POINTER (reg2))
1034 orig_base = XEXP (x, 1);
1041 /* If the index adds a large constant, try to scale the
1042 constant so that it can be loaded with only one insn. */
1043 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1044 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1045 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1046 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1048 /* Divide the CONST_INT by the scale factor, then add it to A. */
1049 int val = INTVAL (XEXP (idx, 1));
1051 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1052 reg1 = XEXP (XEXP (idx, 0), 0);
1053 if (GET_CODE (reg1) != REG)
1054 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1056 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1058 /* We can now generate a simple scaled indexed address. */
1061 (Pmode, gen_rtx_PLUS (Pmode,
1062 gen_rtx_MULT (Pmode, reg1,
1063 XEXP (XEXP (idx, 0), 1)),
1067 /* If B + C is still a valid base register, then add them. */
1068 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1069 && INTVAL (XEXP (idx, 1)) <= 4096
1070 && INTVAL (XEXP (idx, 1)) >= -4096)
1072 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1075 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1077 reg2 = XEXP (XEXP (idx, 0), 0);
1078 if (GET_CODE (reg2) != CONST_INT)
1079 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1081 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1082 gen_rtx_MULT (Pmode,
1088 /* Get the index into a register, then add the base + index and
1089 return a register holding the result. */
1091 /* First get A into a register. */
1092 reg1 = XEXP (XEXP (idx, 0), 0);
1093 if (GET_CODE (reg1) != REG)
1094 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1096 /* And get B into a register. */
1097 reg2 = XEXP (idx, 1);
1098 if (GET_CODE (reg2) != REG)
1099 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1101 reg1 = force_reg (Pmode,
1102 gen_rtx_PLUS (Pmode,
1103 gen_rtx_MULT (Pmode, reg1,
1104 XEXP (XEXP (idx, 0), 1)),
1107 /* Add the result to our base register and return. */
1108 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1112 /* Uh-oh. We might have an address for x[n-100000]. This needs
1113 special handling to avoid creating an indexed memory address
1114 with x-100000 as the base.
1116 If the constant part is small enough, then it's still safe because
1117 there is a guard page at the beginning and end of the data segment.
1119 Scaled references are common enough that we want to try and rearrange the
1120 terms so that we can use indexing for these addresses too. Only
1121 do the optimization for floatint point modes. */
1123 if (GET_CODE (x) == PLUS
1124 && symbolic_expression_p (XEXP (x, 1)))
1126 /* Ugly. We modify things here so that the address offset specified
1127 by the index expression is computed first, then added to x to form
1128 the entire address. */
1130 rtx regx1, regx2, regy1, regy2, y;
1132 /* Strip off any CONST. */
1134 if (GET_CODE (y) == CONST)
1137 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1139 /* See if this looks like
1140 (plus (mult (reg) (shadd_const))
1141 (const (plus (symbol_ref) (const_int))))
1143 Where const_int is small. In that case the const
1144 expression is a valid pointer for indexing.
1146 If const_int is big, but can be divided evenly by shadd_const
1147 and added to (reg). This allows more scaled indexed addresses. */
1148 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1149 && GET_CODE (XEXP (x, 0)) == MULT
1150 && GET_CODE (XEXP (y, 1)) == CONST_INT
1151 && INTVAL (XEXP (y, 1)) >= -4096
1152 && INTVAL (XEXP (y, 1)) <= 4095
1153 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1154 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1156 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1160 if (GET_CODE (reg1) != REG)
1161 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1163 reg2 = XEXP (XEXP (x, 0), 0);
1164 if (GET_CODE (reg2) != REG)
1165 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1167 return force_reg (Pmode,
1168 gen_rtx_PLUS (Pmode,
1169 gen_rtx_MULT (Pmode,
1174 else if ((mode == DFmode || mode == SFmode)
1175 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1176 && GET_CODE (XEXP (x, 0)) == MULT
1177 && GET_CODE (XEXP (y, 1)) == CONST_INT
1178 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1179 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1180 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1183 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1184 / INTVAL (XEXP (XEXP (x, 0), 1))));
1185 regx2 = XEXP (XEXP (x, 0), 0);
1186 if (GET_CODE (regx2) != REG)
1187 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1188 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1192 gen_rtx_PLUS (Pmode,
1193 gen_rtx_MULT (Pmode, regx2,
1194 XEXP (XEXP (x, 0), 1)),
1195 force_reg (Pmode, XEXP (y, 0))));
1197 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1198 && INTVAL (XEXP (y, 1)) >= -4096
1199 && INTVAL (XEXP (y, 1)) <= 4095)
1201 /* This is safe because of the guard page at the
1202 beginning and end of the data space. Just
1203 return the original address. */
1208 /* Doesn't look like one we can optimize. */
1209 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1210 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1211 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1212 regx1 = force_reg (Pmode,
1213 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1215 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1223 /* For the HPPA, REG and REG+CONST is cost 0
1224 and addresses involving symbolic constants are cost 2.
1226 PIC addresses are very expensive.
1228 It is no coincidence that this has the same structure
1229 as GO_IF_LEGITIMATE_ADDRESS. */
1232 hppa_address_cost (rtx X)
1234 switch (GET_CODE (X))
1247 /* Compute a (partial) cost for rtx X. Return true if the complete
1248 cost has been computed, and false if subexpressions should be
1249 scanned. In either case, *TOTAL contains the cost result. */
1252 hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
1257 if (INTVAL (x) == 0)
1259 else if (INT_14_BITS (x))
1276 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1277 && outer_code != SET)
1284 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1285 *total = COSTS_N_INSNS (3);
1286 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1287 *total = COSTS_N_INSNS (8);
1289 *total = COSTS_N_INSNS (20);
1293 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1295 *total = COSTS_N_INSNS (14);
1303 *total = COSTS_N_INSNS (60);
1306 case PLUS: /* this includes shNadd insns */
1308 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1309 *total = COSTS_N_INSNS (3);
1311 *total = COSTS_N_INSNS (1);
1317 *total = COSTS_N_INSNS (1);
1325 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1326 new rtx with the correct mode. */
1328 force_mode (enum machine_mode mode, rtx orig)
1330 if (mode == GET_MODE (orig))
1333 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1335 return gen_rtx_REG (mode, REGNO (orig));
1338 /* Return 1 if *X is a thread-local symbol. */
1341 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1343 return PA_SYMBOL_REF_TLS_P (*x);
1346 /* Return 1 if X contains a thread-local symbol. */
1349 pa_tls_referenced_p (rtx x)
1351 if (!TARGET_HAVE_TLS)
1354 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1357 /* Emit insns to move operands[1] into operands[0].
1359 Return 1 if we have written out everything that needs to be done to
1360 do the move. Otherwise, return 0 and the caller will emit the move
1363 Note SCRATCH_REG may not be in the proper mode depending on how it
1364 will be used. This routine is responsible for creating a new copy
1365 of SCRATCH_REG in the proper mode. */
1368 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1370 register rtx operand0 = operands[0];
1371 register rtx operand1 = operands[1];
1374 /* We can only handle indexed addresses in the destination operand
1375 of floating point stores. Thus, we need to break out indexed
1376 addresses from the destination operand. */
1377 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1379 /* This is only safe up to the beginning of life analysis. */
1380 gcc_assert (!no_new_pseudos);
1382 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1383 operand0 = replace_equiv_address (operand0, tem);
1386 /* On targets with non-equivalent space registers, break out unscaled
1387 indexed addresses from the source operand before the final CSE.
1388 We have to do this because the REG_POINTER flag is not correctly
1389 carried through various optimization passes and CSE may substitute
1390 a pseudo without the pointer set for one with the pointer set. As
1391 a result, we loose various opportunities to create insns with
1392 unscaled indexed addresses. */
1393 if (!TARGET_NO_SPACE_REGS
1394 && !cse_not_expected
1395 && GET_CODE (operand1) == MEM
1396 && GET_CODE (XEXP (operand1, 0)) == PLUS
1397 && REG_P (XEXP (XEXP (operand1, 0), 0))
1398 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1400 = replace_equiv_address (operand1,
1401 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1404 && reload_in_progress && GET_CODE (operand0) == REG
1405 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1406 operand0 = reg_equiv_mem[REGNO (operand0)];
1407 else if (scratch_reg
1408 && reload_in_progress && GET_CODE (operand0) == SUBREG
1409 && GET_CODE (SUBREG_REG (operand0)) == REG
1410 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1412 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1413 the code which tracks sets/uses for delete_output_reload. */
1414 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1415 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1416 SUBREG_BYTE (operand0));
1417 operand0 = alter_subreg (&temp);
1421 && reload_in_progress && GET_CODE (operand1) == REG
1422 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1423 operand1 = reg_equiv_mem[REGNO (operand1)];
1424 else if (scratch_reg
1425 && reload_in_progress && GET_CODE (operand1) == SUBREG
1426 && GET_CODE (SUBREG_REG (operand1)) == REG
1427 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1429 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1430 the code which tracks sets/uses for delete_output_reload. */
1431 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1432 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1433 SUBREG_BYTE (operand1));
1434 operand1 = alter_subreg (&temp);
1437 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1438 && ((tem = find_replacement (&XEXP (operand0, 0)))
1439 != XEXP (operand0, 0)))
1440 operand0 = replace_equiv_address (operand0, tem);
1442 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1443 && ((tem = find_replacement (&XEXP (operand1, 0)))
1444 != XEXP (operand1, 0)))
1445 operand1 = replace_equiv_address (operand1, tem);
1447 /* Handle secondary reloads for loads/stores of FP registers from
1448 REG+D addresses where D does not fit in 5 or 14 bits, including
1449 (subreg (mem (addr))) cases. */
1451 && fp_reg_operand (operand0, mode)
1452 && ((GET_CODE (operand1) == MEM
1453 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1454 XEXP (operand1, 0)))
1455 || ((GET_CODE (operand1) == SUBREG
1456 && GET_CODE (XEXP (operand1, 0)) == MEM
1457 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1459 XEXP (XEXP (operand1, 0), 0))))))
1461 if (GET_CODE (operand1) == SUBREG)
1462 operand1 = XEXP (operand1, 0);
1464 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1465 it in WORD_MODE regardless of what mode it was originally given
1467 scratch_reg = force_mode (word_mode, scratch_reg);
1469 /* D might not fit in 14 bits either; for such cases load D into
1471 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1473 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1474 emit_move_insn (scratch_reg,
1475 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1477 XEXP (XEXP (operand1, 0), 0),
1481 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1482 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1483 replace_equiv_address (operand1, scratch_reg)));
1486 else if (scratch_reg
1487 && fp_reg_operand (operand1, mode)
1488 && ((GET_CODE (operand0) == MEM
1489 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1491 XEXP (operand0, 0)))
1492 || ((GET_CODE (operand0) == SUBREG)
1493 && GET_CODE (XEXP (operand0, 0)) == MEM
1494 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1496 XEXP (XEXP (operand0, 0), 0)))))
1498 if (GET_CODE (operand0) == SUBREG)
1499 operand0 = XEXP (operand0, 0);
1501 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1502 it in WORD_MODE regardless of what mode it was originally given
1504 scratch_reg = force_mode (word_mode, scratch_reg);
1506 /* D might not fit in 14 bits either; for such cases load D into
1508 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1510 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1511 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1514 XEXP (XEXP (operand0, 0),
1519 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1520 emit_insn (gen_rtx_SET (VOIDmode,
1521 replace_equiv_address (operand0, scratch_reg),
1525 /* Handle secondary reloads for loads of FP registers from constant
1526 expressions by forcing the constant into memory.
1528 Use scratch_reg to hold the address of the memory location.
1530 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1531 NO_REGS when presented with a const_int and a register class
1532 containing only FP registers. Doing so unfortunately creates
1533 more problems than it solves. Fix this for 2.5. */
1534 else if (scratch_reg
1535 && CONSTANT_P (operand1)
1536 && fp_reg_operand (operand0, mode))
1538 rtx const_mem, xoperands[2];
1540 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1541 it in WORD_MODE regardless of what mode it was originally given
1543 scratch_reg = force_mode (word_mode, scratch_reg);
1545 /* Force the constant into memory and put the address of the
1546 memory location into scratch_reg. */
1547 const_mem = force_const_mem (mode, operand1);
1548 xoperands[0] = scratch_reg;
1549 xoperands[1] = XEXP (const_mem, 0);
1550 emit_move_sequence (xoperands, Pmode, 0);
1552 /* Now load the destination register. */
1553 emit_insn (gen_rtx_SET (mode, operand0,
1554 replace_equiv_address (const_mem, scratch_reg)));
1557 /* Handle secondary reloads for SAR. These occur when trying to load
1558 the SAR from memory, FP register, or with a constant. */
1559 else if (scratch_reg
1560 && GET_CODE (operand0) == REG
1561 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1562 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1563 && (GET_CODE (operand1) == MEM
1564 || GET_CODE (operand1) == CONST_INT
1565 || (GET_CODE (operand1) == REG
1566 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1568 /* D might not fit in 14 bits either; for such cases load D into
1570 if (GET_CODE (operand1) == MEM
1571 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1573 /* We are reloading the address into the scratch register, so we
1574 want to make sure the scratch register is a full register. */
1575 scratch_reg = force_mode (word_mode, scratch_reg);
1577 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1578 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1581 XEXP (XEXP (operand1, 0),
1585 /* Now we are going to load the scratch register from memory,
1586 we want to load it in the same width as the original MEM,
1587 which must be the same as the width of the ultimate destination,
1589 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1591 emit_move_insn (scratch_reg,
1592 replace_equiv_address (operand1, scratch_reg));
1596 /* We want to load the scratch register using the same mode as
1597 the ultimate destination. */
1598 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1600 emit_move_insn (scratch_reg, operand1);
1603 /* And emit the insn to set the ultimate destination. We know that
1604 the scratch register has the same mode as the destination at this
1606 emit_move_insn (operand0, scratch_reg);
1609 /* Handle the most common case: storing into a register. */
1610 else if (register_operand (operand0, mode))
1612 if (register_operand (operand1, mode)
1613 || (GET_CODE (operand1) == CONST_INT
1614 && cint_ok_for_move (INTVAL (operand1)))
1615 || (operand1 == CONST0_RTX (mode))
1616 || (GET_CODE (operand1) == HIGH
1617 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1618 /* Only `general_operands' can come here, so MEM is ok. */
1619 || GET_CODE (operand1) == MEM)
1621 /* Various sets are created during RTL generation which don't
1622 have the REG_POINTER flag correctly set. After the CSE pass,
1623 instruction recognition can fail if we don't consistently
1624 set this flag when performing register copies. This should
1625 also improve the opportunities for creating insns that use
1626 unscaled indexing. */
1627 if (REG_P (operand0) && REG_P (operand1))
1629 if (REG_POINTER (operand1)
1630 && !REG_POINTER (operand0)
1631 && !HARD_REGISTER_P (operand0))
1632 copy_reg_pointer (operand0, operand1);
1633 else if (REG_POINTER (operand0)
1634 && !REG_POINTER (operand1)
1635 && !HARD_REGISTER_P (operand1))
1636 copy_reg_pointer (operand1, operand0);
1639 /* When MEMs are broken out, the REG_POINTER flag doesn't
1640 get set. In some cases, we can set the REG_POINTER flag
1641 from the declaration for the MEM. */
1642 if (REG_P (operand0)
1643 && GET_CODE (operand1) == MEM
1644 && !REG_POINTER (operand0))
1646 tree decl = MEM_EXPR (operand1);
1648 /* Set the register pointer flag and register alignment
1649 if the declaration for this memory reference is a
1650 pointer type. Fortran indirect argument references
1653 && !(flag_argument_noalias > 1
1654 && TREE_CODE (decl) == INDIRECT_REF
1655 && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1659 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1661 if (TREE_CODE (decl) == COMPONENT_REF)
1662 decl = TREE_OPERAND (decl, 1);
1664 type = TREE_TYPE (decl);
1665 if (TREE_CODE (type) == ARRAY_TYPE)
1666 type = get_inner_array_type (type);
1668 if (POINTER_TYPE_P (type))
1672 type = TREE_TYPE (type);
1673 /* Using TYPE_ALIGN_OK is rather conservative as
1674 only the ada frontend actually sets it. */
1675 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1677 mark_reg_pointer (operand0, align);
1682 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1686 else if (GET_CODE (operand0) == MEM)
1688 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1689 && !(reload_in_progress || reload_completed))
1691 rtx temp = gen_reg_rtx (DFmode);
1693 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1694 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1697 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1699 /* Run this case quickly. */
1700 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1703 if (! (reload_in_progress || reload_completed))
1705 operands[0] = validize_mem (operand0);
1706 operands[1] = operand1 = force_reg (mode, operand1);
1710 /* Simplify the source if we need to.
1711 Note we do have to handle function labels here, even though we do
1712 not consider them legitimate constants. Loop optimizations can
1713 call the emit_move_xxx with one as a source. */
1714 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1715 || function_label_operand (operand1, mode)
1716 || (GET_CODE (operand1) == HIGH
1717 && symbolic_operand (XEXP (operand1, 0), mode)))
1721 if (GET_CODE (operand1) == HIGH)
1724 operand1 = XEXP (operand1, 0);
1726 if (symbolic_operand (operand1, mode))
1728 /* Argh. The assembler and linker can't handle arithmetic
1731 So we force the plabel into memory, load operand0 from
1732 the memory location, then add in the constant part. */
1733 if ((GET_CODE (operand1) == CONST
1734 && GET_CODE (XEXP (operand1, 0)) == PLUS
1735 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1736 || function_label_operand (operand1, mode))
1738 rtx temp, const_part;
1740 /* Figure out what (if any) scratch register to use. */
1741 if (reload_in_progress || reload_completed)
1743 scratch_reg = scratch_reg ? scratch_reg : operand0;
1744 /* SCRATCH_REG will hold an address and maybe the actual
1745 data. We want it in WORD_MODE regardless of what mode it
1746 was originally given to us. */
1747 scratch_reg = force_mode (word_mode, scratch_reg);
1750 scratch_reg = gen_reg_rtx (Pmode);
1752 if (GET_CODE (operand1) == CONST)
1754 /* Save away the constant part of the expression. */
1755 const_part = XEXP (XEXP (operand1, 0), 1);
1756 gcc_assert (GET_CODE (const_part) == CONST_INT);
1758 /* Force the function label into memory. */
1759 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1763 /* No constant part. */
1764 const_part = NULL_RTX;
1766 /* Force the function label into memory. */
1767 temp = force_const_mem (mode, operand1);
1771 /* Get the address of the memory location. PIC-ify it if
1773 temp = XEXP (temp, 0);
1775 temp = legitimize_pic_address (temp, mode, scratch_reg);
1777 /* Put the address of the memory location into our destination
1780 emit_move_sequence (operands, mode, scratch_reg);
1782 /* Now load from the memory location into our destination
1784 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1785 emit_move_sequence (operands, mode, scratch_reg);
1787 /* And add back in the constant part. */
1788 if (const_part != NULL_RTX)
1789 expand_inc (operand0, const_part);
1798 if (reload_in_progress || reload_completed)
1800 temp = scratch_reg ? scratch_reg : operand0;
1801 /* TEMP will hold an address and maybe the actual
1802 data. We want it in WORD_MODE regardless of what mode it
1803 was originally given to us. */
1804 temp = force_mode (word_mode, temp);
1807 temp = gen_reg_rtx (Pmode);
1809 /* (const (plus (symbol) (const_int))) must be forced to
1810 memory during/after reload if the const_int will not fit
1812 if (GET_CODE (operand1) == CONST
1813 && GET_CODE (XEXP (operand1, 0)) == PLUS
1814 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1815 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1816 && (reload_completed || reload_in_progress)
1819 rtx const_mem = force_const_mem (mode, operand1);
1820 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1822 operands[1] = replace_equiv_address (const_mem, operands[1]);
1823 emit_move_sequence (operands, mode, temp);
1827 operands[1] = legitimize_pic_address (operand1, mode, temp);
1828 if (REG_P (operand0) && REG_P (operands[1]))
1829 copy_reg_pointer (operand0, operands[1]);
1830 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1833 /* On the HPPA, references to data space are supposed to use dp,
1834 register 27, but showing it in the RTL inhibits various cse
1835 and loop optimizations. */
1840 if (reload_in_progress || reload_completed)
1842 temp = scratch_reg ? scratch_reg : operand0;
1843 /* TEMP will hold an address and maybe the actual
1844 data. We want it in WORD_MODE regardless of what mode it
1845 was originally given to us. */
1846 temp = force_mode (word_mode, temp);
1849 temp = gen_reg_rtx (mode);
1851 /* Loading a SYMBOL_REF into a register makes that register
1852 safe to be used as the base in an indexed address.
1854 Don't mark hard registers though. That loses. */
1855 if (GET_CODE (operand0) == REG
1856 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1857 mark_reg_pointer (operand0, BITS_PER_UNIT);
1858 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1859 mark_reg_pointer (temp, BITS_PER_UNIT);
1862 set = gen_rtx_SET (mode, operand0, temp);
1864 set = gen_rtx_SET (VOIDmode,
1866 gen_rtx_LO_SUM (mode, temp, operand1));
1868 emit_insn (gen_rtx_SET (VOIDmode,
1870 gen_rtx_HIGH (mode, operand1)));
1876 else if (pa_tls_referenced_p (operand1))
1881 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1883 addend = XEXP (XEXP (tmp, 0), 1);
1884 tmp = XEXP (XEXP (tmp, 0), 0);
1887 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1888 tmp = legitimize_tls_address (tmp);
1891 tmp = gen_rtx_PLUS (mode, tmp, addend);
1892 tmp = force_operand (tmp, operands[0]);
1896 else if (GET_CODE (operand1) != CONST_INT
1897 || !cint_ok_for_move (INTVAL (operand1)))
1901 HOST_WIDE_INT value = 0;
1902 HOST_WIDE_INT insv = 0;
1905 if (GET_CODE (operand1) == CONST_INT)
1906 value = INTVAL (operand1);
1909 && GET_CODE (operand1) == CONST_INT
1910 && HOST_BITS_PER_WIDE_INT > 32
1911 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1915 /* Extract the low order 32 bits of the value and sign extend.
1916 If the new value is the same as the original value, we can
1917 can use the original value as-is. If the new value is
1918 different, we use it and insert the most-significant 32-bits
1919 of the original value into the final result. */
1920 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1921 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1924 #if HOST_BITS_PER_WIDE_INT > 32
1925 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
1929 operand1 = GEN_INT (nval);
1933 if (reload_in_progress || reload_completed)
1934 temp = scratch_reg ? scratch_reg : operand0;
1936 temp = gen_reg_rtx (mode);
1938 /* We don't directly split DImode constants on 32-bit targets
1939 because PLUS uses an 11-bit immediate and the insn sequence
1940 generated is not as efficient as the one using HIGH/LO_SUM. */
1941 if (GET_CODE (operand1) == CONST_INT
1942 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
1943 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
1946 /* Directly break constant into high and low parts. This
1947 provides better optimization opportunities because various
1948 passes recognize constants split with PLUS but not LO_SUM.
1949 We use a 14-bit signed low part except when the addition
1950 of 0x4000 to the high part might change the sign of the
1952 HOST_WIDE_INT low = value & 0x3fff;
1953 HOST_WIDE_INT high = value & ~ 0x3fff;
1957 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1965 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1966 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1970 emit_insn (gen_rtx_SET (VOIDmode, temp,
1971 gen_rtx_HIGH (mode, operand1)));
1972 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1975 insn = emit_move_insn (operands[0], operands[1]);
1977 /* Now insert the most significant 32 bits of the value
1978 into the register. When we don't have a second register
1979 available, it could take up to nine instructions to load
1980 a 64-bit integer constant. Prior to reload, we force
1981 constants that would take more than three instructions
1982 to load to the constant pool. During and after reload,
1983 we have to handle all possible values. */
1986 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
1987 register and the value to be inserted is outside the
1988 range that can be loaded with three depdi instructions. */
1989 if (temp != operand0 && (insv >= 16384 || insv < -16384))
1991 operand1 = GEN_INT (insv);
1993 emit_insn (gen_rtx_SET (VOIDmode, temp,
1994 gen_rtx_HIGH (mode, operand1)));
1995 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
1996 emit_insn (gen_insv (operand0, GEN_INT (32),
2001 int len = 5, pos = 27;
2003 /* Insert the bits using the depdi instruction. */
2006 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2007 HOST_WIDE_INT sign = v5 < 0;
2009 /* Left extend the insertion. */
2010 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2011 while (pos > 0 && (insv & 1) == sign)
2013 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2018 emit_insn (gen_insv (operand0, GEN_INT (len),
2019 GEN_INT (pos), GEN_INT (v5)));
2021 len = pos > 0 && pos < 5 ? pos : 5;
2027 set_unique_reg_note (insn, REG_EQUAL, op1);
2032 /* Now have insn-emit do whatever it normally does. */
2036 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2037 it will need a link/runtime reloc). */
2040 reloc_needed (tree exp)
2044 switch (TREE_CODE (exp))
2049 case POINTER_PLUS_EXPR:
2052 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2053 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2058 case NON_LVALUE_EXPR:
2059 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2065 unsigned HOST_WIDE_INT ix;
2067 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2069 reloc |= reloc_needed (value);
2082 /* Does operand (which is a symbolic_operand) live in text space?
2083 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2087 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2089 if (GET_CODE (operand) == CONST)
2090 operand = XEXP (XEXP (operand, 0), 0);
2093 if (GET_CODE (operand) == SYMBOL_REF)
2094 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2098 if (GET_CODE (operand) == SYMBOL_REF)
2099 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2105 /* Return the best assembler insn template
2106 for moving operands[1] into operands[0] as a fullword. */
2108 singlemove_string (rtx *operands)
2110 HOST_WIDE_INT intval;
2112 if (GET_CODE (operands[0]) == MEM)
2113 return "stw %r1,%0";
2114 if (GET_CODE (operands[1]) == MEM)
2116 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2121 gcc_assert (GET_MODE (operands[1]) == SFmode);
2123 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2125 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2126 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2128 operands[1] = GEN_INT (i);
2129 /* Fall through to CONST_INT case. */
2131 if (GET_CODE (operands[1]) == CONST_INT)
2133 intval = INTVAL (operands[1]);
2135 if (VAL_14_BITS_P (intval))
2137 else if ((intval & 0x7ff) == 0)
2138 return "ldil L'%1,%0";
2139 else if (zdepi_cint_p (intval))
2140 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2142 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2144 return "copy %1,%0";
2148 /* Compute position (in OP[1]) and width (in OP[2])
2149 useful for copying IMM to a register using the zdepi
2150 instructions. Store the immediate value to insert in OP[0]. */
2152 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2156 /* Find the least significant set bit in IMM. */
2157 for (lsb = 0; lsb < 32; lsb++)
2164 /* Choose variants based on *sign* of the 5-bit field. */
2165 if ((imm & 0x10) == 0)
2166 len = (lsb <= 28) ? 4 : 32 - lsb;
2169 /* Find the width of the bitstring in IMM. */
2170 for (len = 5; len < 32; len++)
2172 if ((imm & (1 << len)) == 0)
2176 /* Sign extend IMM as a 5-bit value. */
2177 imm = (imm & 0xf) - 0x10;
2185 /* Compute position (in OP[1]) and width (in OP[2])
2186 useful for copying IMM to a register using the depdi,z
2187 instructions. Store the immediate value to insert in OP[0]. */
2189 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2191 HOST_WIDE_INT lsb, len;
2193 /* Find the least significant set bit in IMM. */
2194 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2201 /* Choose variants based on *sign* of the 5-bit field. */
2202 if ((imm & 0x10) == 0)
2203 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2204 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2207 /* Find the width of the bitstring in IMM. */
2208 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2210 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2214 /* Sign extend IMM as a 5-bit value. */
2215 imm = (imm & 0xf) - 0x10;
2223 /* Output assembler code to perform a doubleword move insn
2224 with operands OPERANDS. */
2227 output_move_double (rtx *operands)
2229 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2231 rtx addreg0 = 0, addreg1 = 0;
2233 /* First classify both operands. */
2235 if (REG_P (operands[0]))
2237 else if (offsettable_memref_p (operands[0]))
2239 else if (GET_CODE (operands[0]) == MEM)
2244 if (REG_P (operands[1]))
2246 else if (CONSTANT_P (operands[1]))
2248 else if (offsettable_memref_p (operands[1]))
2250 else if (GET_CODE (operands[1]) == MEM)
2255 /* Check for the cases that the operand constraints are not
2256 supposed to allow to happen. */
2257 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2259 /* Handle copies between general and floating registers. */
2261 if (optype0 == REGOP && optype1 == REGOP
2262 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2264 if (FP_REG_P (operands[0]))
2266 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2267 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2268 return "{fldds|fldd} -16(%%sp),%0";
2272 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2273 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2274 return "{ldws|ldw} -12(%%sp),%R0";
2278 /* Handle auto decrementing and incrementing loads and stores
2279 specifically, since the structure of the function doesn't work
2280 for them without major modification. Do it better when we learn
2281 this port about the general inc/dec addressing of PA.
2282 (This was written by tege. Chide him if it doesn't work.) */
2284 if (optype0 == MEMOP)
2286 /* We have to output the address syntax ourselves, since print_operand
2287 doesn't deal with the addresses we want to use. Fix this later. */
2289 rtx addr = XEXP (operands[0], 0);
2290 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2292 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2294 operands[0] = XEXP (addr, 0);
2295 gcc_assert (GET_CODE (operands[1]) == REG
2296 && GET_CODE (operands[0]) == REG);
2298 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2300 /* No overlap between high target register and address
2301 register. (We do this in a non-obvious way to
2302 save a register file writeback) */
2303 if (GET_CODE (addr) == POST_INC)
2304 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2305 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2307 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2309 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2311 operands[0] = XEXP (addr, 0);
2312 gcc_assert (GET_CODE (operands[1]) == REG
2313 && GET_CODE (operands[0]) == REG);
2315 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2316 /* No overlap between high target register and address
2317 register. (We do this in a non-obvious way to save a
2318 register file writeback) */
2319 if (GET_CODE (addr) == PRE_INC)
2320 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2321 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2324 if (optype1 == MEMOP)
2326 /* We have to output the address syntax ourselves, since print_operand
2327 doesn't deal with the addresses we want to use. Fix this later. */
2329 rtx addr = XEXP (operands[1], 0);
2330 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2332 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2334 operands[1] = XEXP (addr, 0);
2335 gcc_assert (GET_CODE (operands[0]) == REG
2336 && GET_CODE (operands[1]) == REG);
2338 if (!reg_overlap_mentioned_p (high_reg, addr))
2340 /* No overlap between high target register and address
2341 register. (We do this in a non-obvious way to
2342 save a register file writeback) */
2343 if (GET_CODE (addr) == POST_INC)
2344 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2345 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2349 /* This is an undefined situation. We should load into the
2350 address register *and* update that register. Probably
2351 we don't need to handle this at all. */
2352 if (GET_CODE (addr) == POST_INC)
2353 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2354 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2357 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2359 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2361 operands[1] = XEXP (addr, 0);
2362 gcc_assert (GET_CODE (operands[0]) == REG
2363 && GET_CODE (operands[1]) == REG);
2365 if (!reg_overlap_mentioned_p (high_reg, addr))
2367 /* No overlap between high target register and address
2368 register. (We do this in a non-obvious way to
2369 save a register file writeback) */
2370 if (GET_CODE (addr) == PRE_INC)
2371 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2372 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2376 /* This is an undefined situation. We should load into the
2377 address register *and* update that register. Probably
2378 we don't need to handle this at all. */
2379 if (GET_CODE (addr) == PRE_INC)
2380 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2381 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2384 else if (GET_CODE (addr) == PLUS
2385 && GET_CODE (XEXP (addr, 0)) == MULT)
2388 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2390 if (!reg_overlap_mentioned_p (high_reg, addr))
2392 xoperands[0] = high_reg;
2393 xoperands[1] = XEXP (addr, 1);
2394 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2395 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2396 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2398 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2402 xoperands[0] = high_reg;
2403 xoperands[1] = XEXP (addr, 1);
2404 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2405 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2406 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2408 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2413 /* If an operand is an unoffsettable memory ref, find a register
2414 we can increment temporarily to make it refer to the second word. */
2416 if (optype0 == MEMOP)
2417 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2419 if (optype1 == MEMOP)
2420 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2422 /* Ok, we can do one word at a time.
2423 Normally we do the low-numbered word first.
2425 In either case, set up in LATEHALF the operands to use
2426 for the high-numbered word and in some cases alter the
2427 operands in OPERANDS to be suitable for the low-numbered word. */
2429 if (optype0 == REGOP)
2430 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2431 else if (optype0 == OFFSOP)
2432 latehalf[0] = adjust_address (operands[0], SImode, 4);
2434 latehalf[0] = operands[0];
2436 if (optype1 == REGOP)
2437 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2438 else if (optype1 == OFFSOP)
2439 latehalf[1] = adjust_address (operands[1], SImode, 4);
2440 else if (optype1 == CNSTOP)
2441 split_double (operands[1], &operands[1], &latehalf[1]);
2443 latehalf[1] = operands[1];
2445 /* If the first move would clobber the source of the second one,
2446 do them in the other order.
2448 This can happen in two cases:
2450 mem -> register where the first half of the destination register
2451 is the same register used in the memory's address. Reload
2452 can create such insns.
2454 mem in this case will be either register indirect or register
2455 indirect plus a valid offset.
2457 register -> register move where REGNO(dst) == REGNO(src + 1)
2458 someone (Tim/Tege?) claimed this can happen for parameter loads.
2460 Handle mem -> register case first. */
2461 if (optype0 == REGOP
2462 && (optype1 == MEMOP || optype1 == OFFSOP)
2463 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2466 /* Do the late half first. */
2468 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2469 output_asm_insn (singlemove_string (latehalf), latehalf);
2473 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2474 return singlemove_string (operands);
2477 /* Now handle register -> register case. */
2478 if (optype0 == REGOP && optype1 == REGOP
2479 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2481 output_asm_insn (singlemove_string (latehalf), latehalf);
2482 return singlemove_string (operands);
2485 /* Normal case: do the two words, low-numbered first. */
2487 output_asm_insn (singlemove_string (operands), operands);
2489 /* Make any unoffsettable addresses point at high-numbered word. */
2491 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2493 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2496 output_asm_insn (singlemove_string (latehalf), latehalf);
2498 /* Undo the adds we just did. */
2500 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2502 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2508 output_fp_move_double (rtx *operands)
2510 if (FP_REG_P (operands[0]))
2512 if (FP_REG_P (operands[1])
2513 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2514 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2516 output_asm_insn ("fldd%F1 %1,%0", operands);
2518 else if (FP_REG_P (operands[1]))
2520 output_asm_insn ("fstd%F0 %1,%0", operands);
2526 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2528 /* This is a pain. You have to be prepared to deal with an
2529 arbitrary address here including pre/post increment/decrement.
2531 so avoid this in the MD. */
2532 gcc_assert (GET_CODE (operands[0]) == REG);
2534 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2535 xoperands[0] = operands[0];
2536 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2541 /* Return a REG that occurs in ADDR with coefficient 1.
2542 ADDR can be effectively incremented by incrementing REG. */
2545 find_addr_reg (rtx addr)
2547 while (GET_CODE (addr) == PLUS)
2549 if (GET_CODE (XEXP (addr, 0)) == REG)
2550 addr = XEXP (addr, 0);
2551 else if (GET_CODE (XEXP (addr, 1)) == REG)
2552 addr = XEXP (addr, 1);
2553 else if (CONSTANT_P (XEXP (addr, 0)))
2554 addr = XEXP (addr, 1);
2555 else if (CONSTANT_P (XEXP (addr, 1)))
2556 addr = XEXP (addr, 0);
2560 gcc_assert (GET_CODE (addr) == REG);
2564 /* Emit code to perform a block move.
2566 OPERANDS[0] is the destination pointer as a REG, clobbered.
2567 OPERANDS[1] is the source pointer as a REG, clobbered.
2568 OPERANDS[2] is a register for temporary storage.
2569 OPERANDS[3] is a register for temporary storage.
2570 OPERANDS[4] is the size as a CONST_INT
2571 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2572 OPERANDS[6] is another temporary register. */
2575 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2577 int align = INTVAL (operands[5]);
2578 unsigned long n_bytes = INTVAL (operands[4]);
2580 /* We can't move more than a word at a time because the PA
2581 has no longer integer move insns. (Could use fp mem ops?) */
2582 if (align > (TARGET_64BIT ? 8 : 4))
2583 align = (TARGET_64BIT ? 8 : 4);
2585 /* Note that we know each loop below will execute at least twice
2586 (else we would have open-coded the copy). */
2590 /* Pre-adjust the loop counter. */
2591 operands[4] = GEN_INT (n_bytes - 16);
2592 output_asm_insn ("ldi %4,%2", operands);
2595 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2596 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2597 output_asm_insn ("std,ma %3,8(%0)", operands);
2598 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2599 output_asm_insn ("std,ma %6,8(%0)", operands);
2601 /* Handle the residual. There could be up to 7 bytes of
2602 residual to copy! */
2603 if (n_bytes % 16 != 0)
2605 operands[4] = GEN_INT (n_bytes % 8);
2606 if (n_bytes % 16 >= 8)
2607 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2608 if (n_bytes % 8 != 0)
2609 output_asm_insn ("ldd 0(%1),%6", operands);
2610 if (n_bytes % 16 >= 8)
2611 output_asm_insn ("std,ma %3,8(%0)", operands);
2612 if (n_bytes % 8 != 0)
2613 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2618 /* Pre-adjust the loop counter. */
2619 operands[4] = GEN_INT (n_bytes - 8);
2620 output_asm_insn ("ldi %4,%2", operands);
2623 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2624 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2625 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2626 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2627 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2629 /* Handle the residual. There could be up to 7 bytes of
2630 residual to copy! */
2631 if (n_bytes % 8 != 0)
2633 operands[4] = GEN_INT (n_bytes % 4);
2634 if (n_bytes % 8 >= 4)
2635 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2636 if (n_bytes % 4 != 0)
2637 output_asm_insn ("ldw 0(%1),%6", operands);
2638 if (n_bytes % 8 >= 4)
2639 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2640 if (n_bytes % 4 != 0)
2641 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2646 /* Pre-adjust the loop counter. */
2647 operands[4] = GEN_INT (n_bytes - 4);
2648 output_asm_insn ("ldi %4,%2", operands);
2651 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2652 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2653 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2654 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2655 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2657 /* Handle the residual. */
2658 if (n_bytes % 4 != 0)
2660 if (n_bytes % 4 >= 2)
2661 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2662 if (n_bytes % 2 != 0)
2663 output_asm_insn ("ldb 0(%1),%6", operands);
2664 if (n_bytes % 4 >= 2)
2665 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2666 if (n_bytes % 2 != 0)
2667 output_asm_insn ("stb %6,0(%0)", operands);
2672 /* Pre-adjust the loop counter. */
2673 operands[4] = GEN_INT (n_bytes - 2);
2674 output_asm_insn ("ldi %4,%2", operands);
2677 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2678 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2679 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2680 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2681 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2683 /* Handle the residual. */
2684 if (n_bytes % 2 != 0)
2686 output_asm_insn ("ldb 0(%1),%3", operands);
2687 output_asm_insn ("stb %3,0(%0)", operands);
2696 /* Count the number of insns necessary to handle this block move.
2698 Basic structure is the same as emit_block_move, except that we
2699 count insns rather than emit them. */
2702 compute_movmem_length (rtx insn)
2704 rtx pat = PATTERN (insn);
2705 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2706 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2707 unsigned int n_insns = 0;
2709 /* We can't move more than four bytes at a time because the PA
2710 has no longer integer move insns. (Could use fp mem ops?) */
2711 if (align > (TARGET_64BIT ? 8 : 4))
2712 align = (TARGET_64BIT ? 8 : 4);
2714 /* The basic copying loop. */
2718 if (n_bytes % (2 * align) != 0)
2720 if ((n_bytes % (2 * align)) >= align)
2723 if ((n_bytes % align) != 0)
2727 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2731 /* Emit code to perform a block clear.
2733 OPERANDS[0] is the destination pointer as a REG, clobbered.
2734 OPERANDS[1] is a register for temporary storage.
2735 OPERANDS[2] is the size as a CONST_INT
2736 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2739 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2741 int align = INTVAL (operands[3]);
2742 unsigned long n_bytes = INTVAL (operands[2]);
2744 /* We can't clear more than a word at a time because the PA
2745 has no longer integer move insns. */
2746 if (align > (TARGET_64BIT ? 8 : 4))
2747 align = (TARGET_64BIT ? 8 : 4);
2749 /* Note that we know each loop below will execute at least twice
2750 (else we would have open-coded the copy). */
2754 /* Pre-adjust the loop counter. */
2755 operands[2] = GEN_INT (n_bytes - 16);
2756 output_asm_insn ("ldi %2,%1", operands);
2759 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2760 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2761 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2763 /* Handle the residual. There could be up to 7 bytes of
2764 residual to copy! */
2765 if (n_bytes % 16 != 0)
2767 operands[2] = GEN_INT (n_bytes % 8);
2768 if (n_bytes % 16 >= 8)
2769 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2770 if (n_bytes % 8 != 0)
2771 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2776 /* Pre-adjust the loop counter. */
2777 operands[2] = GEN_INT (n_bytes - 8);
2778 output_asm_insn ("ldi %2,%1", operands);
2781 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2782 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2783 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2785 /* Handle the residual. There could be up to 7 bytes of
2786 residual to copy! */
2787 if (n_bytes % 8 != 0)
2789 operands[2] = GEN_INT (n_bytes % 4);
2790 if (n_bytes % 8 >= 4)
2791 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2792 if (n_bytes % 4 != 0)
2793 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2798 /* Pre-adjust the loop counter. */
2799 operands[2] = GEN_INT (n_bytes - 4);
2800 output_asm_insn ("ldi %2,%1", operands);
2803 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2804 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2805 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2807 /* Handle the residual. */
2808 if (n_bytes % 4 != 0)
2810 if (n_bytes % 4 >= 2)
2811 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2812 if (n_bytes % 2 != 0)
2813 output_asm_insn ("stb %%r0,0(%0)", operands);
2818 /* Pre-adjust the loop counter. */
2819 operands[2] = GEN_INT (n_bytes - 2);
2820 output_asm_insn ("ldi %2,%1", operands);
2823 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2824 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2825 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2827 /* Handle the residual. */
2828 if (n_bytes % 2 != 0)
2829 output_asm_insn ("stb %%r0,0(%0)", operands);
2838 /* Count the number of insns necessary to handle this block move.
2840 Basic structure is the same as emit_block_move, except that we
2841 count insns rather than emit them. */
2844 compute_clrmem_length (rtx insn)
2846 rtx pat = PATTERN (insn);
2847 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2848 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2849 unsigned int n_insns = 0;
2851 /* We can't clear more than a word at a time because the PA
2852 has no longer integer move insns. */
2853 if (align > (TARGET_64BIT ? 8 : 4))
2854 align = (TARGET_64BIT ? 8 : 4);
2856 /* The basic loop. */
2860 if (n_bytes % (2 * align) != 0)
2862 if ((n_bytes % (2 * align)) >= align)
2865 if ((n_bytes % align) != 0)
2869 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2875 output_and (rtx *operands)
2877 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2879 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2880 int ls0, ls1, ms0, p, len;
2882 for (ls0 = 0; ls0 < 32; ls0++)
2883 if ((mask & (1 << ls0)) == 0)
2886 for (ls1 = ls0; ls1 < 32; ls1++)
2887 if ((mask & (1 << ls1)) != 0)
2890 for (ms0 = ls1; ms0 < 32; ms0++)
2891 if ((mask & (1 << ms0)) == 0)
2894 gcc_assert (ms0 == 32);
2902 operands[2] = GEN_INT (len);
2903 return "{extru|extrw,u} %1,31,%2,%0";
2907 /* We could use this `depi' for the case above as well, but `depi'
2908 requires one more register file access than an `extru'. */
2913 operands[2] = GEN_INT (p);
2914 operands[3] = GEN_INT (len);
2915 return "{depi|depwi} 0,%2,%3,%0";
2919 return "and %1,%2,%0";
2922 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2923 storing the result in operands[0]. */
2925 output_64bit_and (rtx *operands)
2927 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2929 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2930 int ls0, ls1, ms0, p, len;
2932 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2933 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2936 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2937 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2940 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2941 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2944 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
2946 if (ls1 == HOST_BITS_PER_WIDE_INT)
2952 operands[2] = GEN_INT (len);
2953 return "extrd,u %1,63,%2,%0";
2957 /* We could use this `depi' for the case above as well, but `depi'
2958 requires one more register file access than an `extru'. */
2963 operands[2] = GEN_INT (p);
2964 operands[3] = GEN_INT (len);
2965 return "depdi 0,%2,%3,%0";
2969 return "and %1,%2,%0";
2973 output_ior (rtx *operands)
2975 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2976 int bs0, bs1, p, len;
2978 if (INTVAL (operands[2]) == 0)
2979 return "copy %1,%0";
2981 for (bs0 = 0; bs0 < 32; bs0++)
2982 if ((mask & (1 << bs0)) != 0)
2985 for (bs1 = bs0; bs1 < 32; bs1++)
2986 if ((mask & (1 << bs1)) == 0)
2989 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
2994 operands[2] = GEN_INT (p);
2995 operands[3] = GEN_INT (len);
2996 return "{depi|depwi} -1,%2,%3,%0";
2999 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3000 storing the result in operands[0]. */
3002 output_64bit_ior (rtx *operands)
3004 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3005 int bs0, bs1, p, len;
3007 if (INTVAL (operands[2]) == 0)
3008 return "copy %1,%0";
3010 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3011 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3014 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3015 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3018 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3019 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3024 operands[2] = GEN_INT (p);
3025 operands[3] = GEN_INT (len);
3026 return "depdi -1,%2,%3,%0";
3029 /* Target hook for assembling integer objects. This code handles
3030 aligned SI and DI integers specially since function references
3031 must be preceded by P%. */
3034 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3036 if (size == UNITS_PER_WORD
3038 && function_label_operand (x, VOIDmode))
3040 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3041 output_addr_const (asm_out_file, x);
3042 fputc ('\n', asm_out_file);
3045 return default_assemble_integer (x, size, aligned_p);
3048 /* Output an ascii string. */
3050 output_ascii (FILE *file, const char *p, int size)
3054 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3056 /* The HP assembler can only take strings of 256 characters at one
3057 time. This is a limitation on input line length, *not* the
3058 length of the string. Sigh. Even worse, it seems that the
3059 restriction is in number of input characters (see \xnn &
3060 \whatever). So we have to do this very carefully. */
3062 fputs ("\t.STRING \"", file);
3065 for (i = 0; i < size; i += 4)
3069 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3071 register unsigned int c = (unsigned char) p[i + io];
3073 if (c == '\"' || c == '\\')
3074 partial_output[co++] = '\\';
3075 if (c >= ' ' && c < 0177)
3076 partial_output[co++] = c;
3080 partial_output[co++] = '\\';
3081 partial_output[co++] = 'x';
3082 hexd = c / 16 - 0 + '0';
3084 hexd -= '9' - 'a' + 1;
3085 partial_output[co++] = hexd;
3086 hexd = c % 16 - 0 + '0';
3088 hexd -= '9' - 'a' + 1;
3089 partial_output[co++] = hexd;
3092 if (chars_output + co > 243)
3094 fputs ("\"\n\t.STRING \"", file);
3097 fwrite (partial_output, 1, (size_t) co, file);
3101 fputs ("\"\n", file);
3104 /* Try to rewrite floating point comparisons & branches to avoid
3105 useless add,tr insns.
3107 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3108 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3109 first attempt to remove useless add,tr insns. It is zero
3110 for the second pass as reorg sometimes leaves bogus REG_DEAD
3113 When CHECK_NOTES is zero we can only eliminate add,tr insns
3114 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3117 remove_useless_addtr_insns (int check_notes)
3120 static int pass = 0;
3122 /* This is fairly cheap, so always run it when optimizing. */
3126 int fbranch_count = 0;
3128 /* Walk all the insns in this function looking for fcmp & fbranch
3129 instructions. Keep track of how many of each we find. */
3130 for (insn = get_insns (); insn; insn = next_insn (insn))
3134 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3135 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3138 tmp = PATTERN (insn);
3140 /* It must be a set. */
3141 if (GET_CODE (tmp) != SET)
3144 /* If the destination is CCFP, then we've found an fcmp insn. */
3145 tmp = SET_DEST (tmp);
3146 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3152 tmp = PATTERN (insn);
3153 /* If this is an fbranch instruction, bump the fbranch counter. */
3154 if (GET_CODE (tmp) == SET
3155 && SET_DEST (tmp) == pc_rtx
3156 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3157 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3158 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3159 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3167 /* Find all floating point compare + branch insns. If possible,
3168 reverse the comparison & the branch to avoid add,tr insns. */
3169 for (insn = get_insns (); insn; insn = next_insn (insn))
3173 /* Ignore anything that isn't an INSN. */
3174 if (GET_CODE (insn) != INSN)
3177 tmp = PATTERN (insn);
3179 /* It must be a set. */
3180 if (GET_CODE (tmp) != SET)
3183 /* The destination must be CCFP, which is register zero. */
3184 tmp = SET_DEST (tmp);
3185 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3188 /* INSN should be a set of CCFP.
3190 See if the result of this insn is used in a reversed FP
3191 conditional branch. If so, reverse our condition and
3192 the branch. Doing so avoids useless add,tr insns. */
3193 next = next_insn (insn);
3196 /* Jumps, calls and labels stop our search. */
3197 if (GET_CODE (next) == JUMP_INSN
3198 || GET_CODE (next) == CALL_INSN
3199 || GET_CODE (next) == CODE_LABEL)
3202 /* As does another fcmp insn. */
3203 if (GET_CODE (next) == INSN
3204 && GET_CODE (PATTERN (next)) == SET
3205 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3206 && REGNO (SET_DEST (PATTERN (next))) == 0)
3209 next = next_insn (next);
3212 /* Is NEXT_INSN a branch? */
3214 && GET_CODE (next) == JUMP_INSN)
3216 rtx pattern = PATTERN (next);
3218 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3219 and CCFP dies, then reverse our conditional and the branch
3220 to avoid the add,tr. */
3221 if (GET_CODE (pattern) == SET
3222 && SET_DEST (pattern) == pc_rtx
3223 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3224 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3225 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3226 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3227 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3228 && (fcmp_count == fbranch_count
3230 && find_regno_note (next, REG_DEAD, 0))))
3232 /* Reverse the branch. */
3233 tmp = XEXP (SET_SRC (pattern), 1);
3234 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3235 XEXP (SET_SRC (pattern), 2) = tmp;
3236 INSN_CODE (next) = -1;
3238 /* Reverse our condition. */
3239 tmp = PATTERN (insn);
3240 PUT_CODE (XEXP (tmp, 1),
3241 (reverse_condition_maybe_unordered
3242 (GET_CODE (XEXP (tmp, 1)))));
3252 /* You may have trouble believing this, but this is the 32 bit HP-PA
3257 Variable arguments (optional; any number may be allocated)
3259 SP-(4*(N+9)) arg word N
3264 Fixed arguments (must be allocated; may remain unused)
3273 SP-32 External Data Pointer (DP)
3275 SP-24 External/stub RP (RP')
3279 SP-8 Calling Stub RP (RP'')
3284 SP-0 Stack Pointer (points to next available address)
3288 /* This function saves registers as follows. Registers marked with ' are
3289 this function's registers (as opposed to the previous function's).
3290 If a frame_pointer isn't needed, r4 is saved as a general register;
3291 the space for the frame pointer is still allocated, though, to keep
3297 SP (FP') Previous FP
3298 SP + 4 Alignment filler (sigh)
3299 SP + 8 Space for locals reserved here.
3303 SP + n All call saved register used.
3307 SP + o All call saved fp registers used.
3311 SP + p (SP') points to next available address.
3315 /* Global variables set by output_function_prologue(). */
3316 /* Size of frame. Need to know this to emit return insns from
3318 static HOST_WIDE_INT actual_fsize, local_fsize;
3319 static int save_fregs;
3321 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3322 Handle case where DISP > 8k by using the add_high_const patterns.
3324 Note in DISP > 8k case, we will leave the high part of the address
3325 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3328 store_reg (int reg, HOST_WIDE_INT disp, int base)
3330 rtx insn, dest, src, basereg;
3332 src = gen_rtx_REG (word_mode, reg);
3333 basereg = gen_rtx_REG (Pmode, base);
3334 if (VAL_14_BITS_P (disp))
3336 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3337 insn = emit_move_insn (dest, src);
3339 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3341 rtx delta = GEN_INT (disp);
3342 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3344 emit_move_insn (tmpreg, delta);
3345 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3349 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3350 gen_rtx_SET (VOIDmode, tmpreg,
3351 gen_rtx_PLUS (Pmode, basereg, delta)),
3353 RTX_FRAME_RELATED_P (insn) = 1;
3355 dest = gen_rtx_MEM (word_mode, tmpreg);
3356 insn = emit_move_insn (dest, src);
3360 rtx delta = GEN_INT (disp);
3361 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3362 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3364 emit_move_insn (tmpreg, high);
3365 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3366 insn = emit_move_insn (dest, src);
3370 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3371 gen_rtx_SET (VOIDmode,
3372 gen_rtx_MEM (word_mode,
3373 gen_rtx_PLUS (word_mode, basereg,
3381 RTX_FRAME_RELATED_P (insn) = 1;
3384 /* Emit RTL to store REG at the memory location specified by BASE and then
3385 add MOD to BASE. MOD must be <= 8k. */
3388 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3390 rtx insn, basereg, srcreg, delta;
3392 gcc_assert (VAL_14_BITS_P (mod));
3394 basereg = gen_rtx_REG (Pmode, base);
3395 srcreg = gen_rtx_REG (word_mode, reg);
3396 delta = GEN_INT (mod);
3398 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3401 RTX_FRAME_RELATED_P (insn) = 1;
3403 /* RTX_FRAME_RELATED_P must be set on each frame related set
3404 in a parallel with more than one element. */
3405 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3406 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3410 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3411 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3412 whether to add a frame note or not.
3414 In the DISP > 8k case, we leave the high part of the address in %r1.
3415 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3418 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3422 if (VAL_14_BITS_P (disp))
3424 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3425 plus_constant (gen_rtx_REG (Pmode, base), disp));
3427 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3429 rtx basereg = gen_rtx_REG (Pmode, base);
3430 rtx delta = GEN_INT (disp);
3431 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3433 emit_move_insn (tmpreg, delta);
3434 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3435 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3438 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3439 gen_rtx_SET (VOIDmode, tmpreg,
3440 gen_rtx_PLUS (Pmode, basereg, delta)),
3445 rtx basereg = gen_rtx_REG (Pmode, base);
3446 rtx delta = GEN_INT (disp);
3447 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3449 emit_move_insn (tmpreg,
3450 gen_rtx_PLUS (Pmode, basereg,
3451 gen_rtx_HIGH (Pmode, delta)));
3452 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3453 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3456 if (DO_FRAME_NOTES && note)
3457 RTX_FRAME_RELATED_P (insn) = 1;
3461 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3466 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3467 be consistent with the rounding and size calculation done here.
3468 Change them at the same time. */
3470 /* We do our own stack alignment. First, round the size of the
3471 stack locals up to a word boundary. */
3472 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3474 /* Space for previous frame pointer + filler. If any frame is
3475 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3476 waste some space here for the sake of HP compatibility. The
3477 first slot is only used when the frame pointer is needed. */
3478 if (size || frame_pointer_needed)
3479 size += STARTING_FRAME_OFFSET;
3481 /* If the current function calls __builtin_eh_return, then we need
3482 to allocate stack space for registers that will hold data for
3483 the exception handler. */
3484 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3488 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3490 size += i * UNITS_PER_WORD;
3493 /* Account for space used by the callee general register saves. */
3494 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3495 if (df_regs_ever_live_p (i))
3496 size += UNITS_PER_WORD;
3498 /* Account for space used by the callee floating point register saves. */
3499 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3500 if (df_regs_ever_live_p (i)
3501 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3505 /* We always save both halves of the FP register, so always
3506 increment the frame size by 8 bytes. */
3510 /* If any of the floating registers are saved, account for the
3511 alignment needed for the floating point register save block. */
3514 size = (size + 7) & ~7;
3519 /* The various ABIs include space for the outgoing parameters in the
3520 size of the current function's stack frame. We don't need to align
3521 for the outgoing arguments as their alignment is set by the final
3522 rounding for the frame as a whole. */
3523 size += current_function_outgoing_args_size;
3525 /* Allocate space for the fixed frame marker. This space must be
3526 allocated for any function that makes calls or allocates
3528 if (!current_function_is_leaf || size)
3529 size += TARGET_64BIT ? 48 : 32;
3531 /* Finally, round to the preferred stack boundary. */
3532 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3533 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3536 /* Generate the assembly code for function entry. FILE is a stdio
3537 stream to output the code to. SIZE is an int: how many units of
3538 temporary storage to allocate.
3540 Refer to the array `regs_ever_live' to determine which registers to
3541 save; `regs_ever_live[I]' is nonzero if register number I is ever
3542 used in the function. This function is responsible for knowing
3543 which registers should not be saved even if used. */
3545 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3546 of memory. If any fpu reg is used in the function, we allocate
3547 such a block here, at the bottom of the frame, just in case it's needed.
3549 If this function is a leaf procedure, then we may choose not
3550 to do a "save" insn. The decision about whether or not
3551 to do this is made in regclass.c. */
3554 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3556 /* The function's label and associated .PROC must never be
3557 separated and must be output *after* any profiling declarations
3558 to avoid changing spaces/subspaces within a procedure. */
3559 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3560 fputs ("\t.PROC\n", file);
3562 /* hppa_expand_prologue does the dirty work now. We just need
3563 to output the assembler directives which denote the start
3565 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3566 if (df_regs_ever_live_p (2))
3567 fputs (",CALLS,SAVE_RP", file);
3569 fputs (",NO_CALLS", file);
3571 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3572 at the beginning of the frame and that it is used as the frame
3573 pointer for the frame. We do this because our current frame
3574 layout doesn't conform to that specified in the HP runtime
3575 documentation and we need a way to indicate to programs such as
3576 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3577 isn't used by HP compilers but is supported by the assembler.
3578 However, SAVE_SP is supposed to indicate that the previous stack
3579 pointer has been saved in the frame marker. */
3580 if (frame_pointer_needed)
3581 fputs (",SAVE_SP", file);
3583 /* Pass on information about the number of callee register saves
3584 performed in the prologue.
3586 The compiler is supposed to pass the highest register number
3587 saved, the assembler then has to adjust that number before
3588 entering it into the unwind descriptor (to account for any
3589 caller saved registers with lower register numbers than the
3590 first callee saved register). */
3592 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3595 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3597 fputs ("\n\t.ENTRY\n", file);
3599 remove_useless_addtr_insns (0);
3603 hppa_expand_prologue (void)
3605 int merge_sp_adjust_with_store = 0;
3606 HOST_WIDE_INT size = get_frame_size ();
3607 HOST_WIDE_INT offset;
3615 /* Compute total size for frame pointer, filler, locals and rounding to
3616 the next word boundary. Similar code appears in compute_frame_size
3617 and must be changed in tandem with this code. */
3618 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3619 if (local_fsize || frame_pointer_needed)
3620 local_fsize += STARTING_FRAME_OFFSET;
3622 actual_fsize = compute_frame_size (size, &save_fregs);
3624 /* Compute a few things we will use often. */
3625 tmpreg = gen_rtx_REG (word_mode, 1);
3627 /* Save RP first. The calling conventions manual states RP will
3628 always be stored into the caller's frame at sp - 20 or sp - 16
3629 depending on which ABI is in use. */
3630 if (df_regs_ever_live_p (2) || current_function_calls_eh_return)
3631 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3633 /* Allocate the local frame and set up the frame pointer if needed. */
3634 if (actual_fsize != 0)
3636 if (frame_pointer_needed)
3638 /* Copy the old frame pointer temporarily into %r1. Set up the
3639 new stack pointer, then store away the saved old frame pointer
3640 into the stack at sp and at the same time update the stack
3641 pointer by actual_fsize bytes. Two versions, first
3642 handles small (<8k) frames. The second handles large (>=8k)
3644 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3646 RTX_FRAME_RELATED_P (insn) = 1;
3648 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3650 RTX_FRAME_RELATED_P (insn) = 1;
3652 if (VAL_14_BITS_P (actual_fsize))
3653 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3656 /* It is incorrect to store the saved frame pointer at *sp,
3657 then increment sp (writes beyond the current stack boundary).
3659 So instead use stwm to store at *sp and post-increment the
3660 stack pointer as an atomic operation. Then increment sp to
3661 finish allocating the new frame. */
3662 HOST_WIDE_INT adjust1 = 8192 - 64;
3663 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3665 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3666 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3670 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3671 we need to store the previous stack pointer (frame pointer)
3672 into the frame marker on targets that use the HP unwind
3673 library. This allows the HP unwind library to be used to
3674 unwind GCC frames. However, we are not fully compatible
3675 with the HP library because our frame layout differs from
3676 that specified in the HP runtime specification.
3678 We don't want a frame note on this instruction as the frame
3679 marker moves during dynamic stack allocation.
3681 This instruction also serves as a blockage to prevent
3682 register spills from being scheduled before the stack
3683 pointer is raised. This is necessary as we store
3684 registers using the frame pointer as a base register,
3685 and the frame pointer is set before sp is raised. */
3686 if (TARGET_HPUX_UNWIND_LIBRARY)
3688 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3689 GEN_INT (TARGET_64BIT ? -8 : -4));
3691 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3695 emit_insn (gen_blockage ());
3697 /* no frame pointer needed. */
3700 /* In some cases we can perform the first callee register save
3701 and allocating the stack frame at the same time. If so, just
3702 make a note of it and defer allocating the frame until saving
3703 the callee registers. */
3704 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3705 merge_sp_adjust_with_store = 1;
3706 /* Can not optimize. Adjust the stack frame by actual_fsize
3709 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3714 /* Normal register save.
3716 Do not save the frame pointer in the frame_pointer_needed case. It
3717 was done earlier. */
3718 if (frame_pointer_needed)
3720 offset = local_fsize;
3722 /* Saving the EH return data registers in the frame is the simplest
3723 way to get the frame unwind information emitted. We put them
3724 just before the general registers. */
3725 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3727 unsigned int i, regno;
3731 regno = EH_RETURN_DATA_REGNO (i);
3732 if (regno == INVALID_REGNUM)
3735 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3736 offset += UNITS_PER_WORD;
3740 for (i = 18; i >= 4; i--)
3741 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3743 store_reg (i, offset, FRAME_POINTER_REGNUM);
3744 offset += UNITS_PER_WORD;
3747 /* Account for %r3 which is saved in a special place. */
3750 /* No frame pointer needed. */
3753 offset = local_fsize - actual_fsize;
3755 /* Saving the EH return data registers in the frame is the simplest
3756 way to get the frame unwind information emitted. */
3757 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3759 unsigned int i, regno;
3763 regno = EH_RETURN_DATA_REGNO (i);
3764 if (regno == INVALID_REGNUM)
3767 /* If merge_sp_adjust_with_store is nonzero, then we can
3768 optimize the first save. */
3769 if (merge_sp_adjust_with_store)
3771 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3772 merge_sp_adjust_with_store = 0;
3775 store_reg (regno, offset, STACK_POINTER_REGNUM);
3776 offset += UNITS_PER_WORD;
3780 for (i = 18; i >= 3; i--)
3781 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3783 /* If merge_sp_adjust_with_store is nonzero, then we can
3784 optimize the first GR save. */
3785 if (merge_sp_adjust_with_store)
3787 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3788 merge_sp_adjust_with_store = 0;
3791 store_reg (i, offset, STACK_POINTER_REGNUM);
3792 offset += UNITS_PER_WORD;
3796 /* If we wanted to merge the SP adjustment with a GR save, but we never
3797 did any GR saves, then just emit the adjustment here. */
3798 if (merge_sp_adjust_with_store)
3799 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3803 /* The hppa calling conventions say that %r19, the pic offset
3804 register, is saved at sp - 32 (in this function's frame)
3805 when generating PIC code. FIXME: What is the correct thing
3806 to do for functions which make no calls and allocate no
3807 frame? Do we need to allocate a frame, or can we just omit
3808 the save? For now we'll just omit the save.
3810 We don't want a note on this insn as the frame marker can
3811 move if there is a dynamic stack allocation. */
3812 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3814 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3816 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3820 /* Align pointer properly (doubleword boundary). */
3821 offset = (offset + 7) & ~7;
3823 /* Floating point register store. */
3828 /* First get the frame or stack pointer to the start of the FP register
3830 if (frame_pointer_needed)
3832 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3833 base = frame_pointer_rtx;
3837 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3838 base = stack_pointer_rtx;
3841 /* Now actually save the FP registers. */
3842 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3844 if (df_regs_ever_live_p (i)
3845 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3847 rtx addr, insn, reg;
3848 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3849 reg = gen_rtx_REG (DFmode, i);
3850 insn = emit_move_insn (addr, reg);
3853 RTX_FRAME_RELATED_P (insn) = 1;
3856 rtx mem = gen_rtx_MEM (DFmode,
3857 plus_constant (base, offset));
3859 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3860 gen_rtx_SET (VOIDmode, mem, reg),
3865 rtx meml = gen_rtx_MEM (SFmode,
3866 plus_constant (base, offset));
3867 rtx memr = gen_rtx_MEM (SFmode,
3868 plus_constant (base, offset + 4));
3869 rtx regl = gen_rtx_REG (SFmode, i);
3870 rtx regr = gen_rtx_REG (SFmode, i + 1);
3871 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3872 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3875 RTX_FRAME_RELATED_P (setl) = 1;
3876 RTX_FRAME_RELATED_P (setr) = 1;
3877 vec = gen_rtvec (2, setl, setr);
3879 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3880 gen_rtx_SEQUENCE (VOIDmode, vec),
3884 offset += GET_MODE_SIZE (DFmode);
3891 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3892 Handle case where DISP > 8k by using the add_high_const patterns. */
3895 load_reg (int reg, HOST_WIDE_INT disp, int base)
3897 rtx dest = gen_rtx_REG (word_mode, reg);
3898 rtx basereg = gen_rtx_REG (Pmode, base);
3901 if (VAL_14_BITS_P (disp))
3902 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3903 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3905 rtx delta = GEN_INT (disp);
3906 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3908 emit_move_insn (tmpreg, delta);
3909 if (TARGET_DISABLE_INDEXING)
3911 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3912 src = gen_rtx_MEM (word_mode, tmpreg);
3915 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3919 rtx delta = GEN_INT (disp);
3920 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3921 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3923 emit_move_insn (tmpreg, high);
3924 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3927 emit_move_insn (dest, src);
3930 /* Update the total code bytes output to the text section. */
3933 update_total_code_bytes (int nbytes)
3935 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
3936 && !IN_NAMED_SECTION_P (cfun->decl))
3938 if (INSN_ADDRESSES_SET_P ())
3940 unsigned long old_total = total_code_bytes;
3942 total_code_bytes += nbytes;
3944 /* Be prepared to handle overflows. */
3945 if (old_total > total_code_bytes)
3946 total_code_bytes = -1;
3949 total_code_bytes = -1;
3953 /* This function generates the assembly code for function exit.
3954 Args are as for output_function_prologue ().
3956 The function epilogue should not depend on the current stack
3957 pointer! It should use the frame pointer only. This is mandatory
3958 because of alloca; we also take advantage of it to omit stack
3959 adjustments before returning. */
3962 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3964 rtx insn = get_last_insn ();
3968 /* hppa_expand_epilogue does the dirty work now. We just need
3969 to output the assembler directives which denote the end
3972 To make debuggers happy, emit a nop if the epilogue was completely
3973 eliminated due to a volatile call as the last insn in the
3974 current function. That way the return address (in %r2) will
3975 always point to a valid instruction in the current function. */
3977 /* Get the last real insn. */
3978 if (GET_CODE (insn) == NOTE)
3979 insn = prev_real_insn (insn);
3981 /* If it is a sequence, then look inside. */
3982 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3983 insn = XVECEXP (PATTERN (insn), 0, 0);
3985 /* If insn is a CALL_INSN, then it must be a call to a volatile
3986 function (otherwise there would be epilogue insns). */
3987 if (insn && GET_CODE (insn) == CALL_INSN)
3989 fputs ("\tnop\n", file);
3993 fputs ("\t.EXIT\n\t.PROCEND\n", file);
3995 if (TARGET_SOM && TARGET_GAS)
3997 /* We done with this subspace except possibly for some additional
3998 debug information. Forget that we are in this subspace to ensure
3999 that the next function is output in its own subspace. */
4001 cfun->machine->in_nsubspa = 2;
4004 if (INSN_ADDRESSES_SET_P ())
4006 insn = get_last_nonnote_insn ();
4007 last_address += INSN_ADDRESSES (INSN_UID (insn));
4009 last_address += insn_default_length (insn);
4010 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4011 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4014 /* Finally, update the total number of code bytes output so far. */
4015 update_total_code_bytes (last_address);
4019 hppa_expand_epilogue (void)
4022 HOST_WIDE_INT offset;
4023 HOST_WIDE_INT ret_off = 0;
4025 int merge_sp_adjust_with_load = 0;
4027 /* We will use this often. */
4028 tmpreg = gen_rtx_REG (word_mode, 1);
4030 /* Try to restore RP early to avoid load/use interlocks when
4031 RP gets used in the return (bv) instruction. This appears to still
4032 be necessary even when we schedule the prologue and epilogue. */
4033 if (df_regs_ever_live_p (2) || current_function_calls_eh_return)
4035 ret_off = TARGET_64BIT ? -16 : -20;
4036 if (frame_pointer_needed)
4038 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4043 /* No frame pointer, and stack is smaller than 8k. */
4044 if (VAL_14_BITS_P (ret_off - actual_fsize))
4046 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4052 /* General register restores. */
4053 if (frame_pointer_needed)
4055 offset = local_fsize;
4057 /* If the current function calls __builtin_eh_return, then we need
4058 to restore the saved EH data registers. */
4059 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4061 unsigned int i, regno;
4065 regno = EH_RETURN_DATA_REGNO (i);
4066 if (regno == INVALID_REGNUM)
4069 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4070 offset += UNITS_PER_WORD;
4074 for (i = 18; i >= 4; i--)
4075 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4077 load_reg (i, offset, FRAME_POINTER_REGNUM);
4078 offset += UNITS_PER_WORD;
4083 offset = local_fsize - actual_fsize;
4085 /* If the current function calls __builtin_eh_return, then we need
4086 to restore the saved EH data registers. */
4087 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4089 unsigned int i, regno;
4093 regno = EH_RETURN_DATA_REGNO (i);
4094 if (regno == INVALID_REGNUM)
4097 /* Only for the first load.
4098 merge_sp_adjust_with_load holds the register load
4099 with which we will merge the sp adjustment. */
4100 if (merge_sp_adjust_with_load == 0
4102 && VAL_14_BITS_P (-actual_fsize))
4103 merge_sp_adjust_with_load = regno;
4105 load_reg (regno, offset, STACK_POINTER_REGNUM);
4106 offset += UNITS_PER_WORD;
4110 for (i = 18; i >= 3; i--)
4112 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4114 /* Only for the first load.
4115 merge_sp_adjust_with_load holds the register load
4116 with which we will merge the sp adjustment. */
4117 if (merge_sp_adjust_with_load == 0
4119 && VAL_14_BITS_P (-actual_fsize))
4120 merge_sp_adjust_with_load = i;
4122 load_reg (i, offset, STACK_POINTER_REGNUM);
4123 offset += UNITS_PER_WORD;
4128 /* Align pointer properly (doubleword boundary). */
4129 offset = (offset + 7) & ~7;
4131 /* FP register restores. */
4134 /* Adjust the register to index off of. */
4135 if (frame_pointer_needed)
4136 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4138 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4140 /* Actually do the restores now. */
4141 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4142 if (df_regs_ever_live_p (i)
4143 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4145 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4146 rtx dest = gen_rtx_REG (DFmode, i);
4147 emit_move_insn (dest, src);
4151 /* Emit a blockage insn here to keep these insns from being moved to
4152 an earlier spot in the epilogue, or into the main instruction stream.
4154 This is necessary as we must not cut the stack back before all the
4155 restores are finished. */
4156 emit_insn (gen_blockage ());
4158 /* Reset stack pointer (and possibly frame pointer). The stack
4159 pointer is initially set to fp + 64 to avoid a race condition. */
4160 if (frame_pointer_needed)
4162 rtx delta = GEN_INT (-64);
4164 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4165 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4167 /* If we were deferring a callee register restore, do it now. */
4168 else if (merge_sp_adjust_with_load)
4170 rtx delta = GEN_INT (-actual_fsize);
4171 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4173 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4175 else if (actual_fsize != 0)
4176 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4179 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4180 frame greater than 8k), do so now. */
4182 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4184 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4186 rtx sa = EH_RETURN_STACKADJ_RTX;
4188 emit_insn (gen_blockage ());
4189 emit_insn (TARGET_64BIT
4190 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4191 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4196 hppa_pic_save_rtx (void)
4198 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4201 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4202 #define NO_DEFERRED_PROFILE_COUNTERS 0
4206 /* Vector of funcdef numbers. */
4207 static VEC(int,heap) *funcdef_nos;
4209 /* Output deferred profile counters. */
4211 output_deferred_profile_counters (void)
4216 if (VEC_empty (int, funcdef_nos))
4219 switch_to_section (data_section);
4220 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4221 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4223 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4225 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4226 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4229 VEC_free (int, heap, funcdef_nos);
4233 hppa_profile_hook (int label_no)
4235 /* We use SImode for the address of the function in both 32 and
4236 64-bit code to avoid having to provide DImode versions of the
4237 lcla2 and load_offset_label_address insn patterns. */
4238 rtx reg = gen_reg_rtx (SImode);
4239 rtx label_rtx = gen_label_rtx ();
4240 rtx begin_label_rtx, call_insn;
4241 char begin_label_name[16];
4243 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4245 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4248 emit_move_insn (arg_pointer_rtx,
4249 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4252 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4254 /* The address of the function is loaded into %r25 with an instruction-
4255 relative sequence that avoids the use of relocations. The sequence
4256 is split so that the load_offset_label_address instruction can
4257 occupy the delay slot of the call to _mcount. */
4259 emit_insn (gen_lcla2 (reg, label_rtx));
4261 emit_insn (gen_lcla1 (reg, label_rtx));
4263 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4264 reg, begin_label_rtx, label_rtx));
4266 #if !NO_DEFERRED_PROFILE_COUNTERS
4268 rtx count_label_rtx, addr, r24;
4269 char count_label_name[16];
4271 VEC_safe_push (int, heap, funcdef_nos, label_no);
4272 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4273 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4275 addr = force_reg (Pmode, count_label_rtx);
4276 r24 = gen_rtx_REG (Pmode, 24);
4277 emit_move_insn (r24, addr);
4280 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4281 gen_rtx_SYMBOL_REF (Pmode,
4283 GEN_INT (TARGET_64BIT ? 24 : 12)));
4285 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4290 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4291 gen_rtx_SYMBOL_REF (Pmode,
4293 GEN_INT (TARGET_64BIT ? 16 : 8)));
4297 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4298 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4300 /* Indicate the _mcount call cannot throw, nor will it execute a
4302 REG_NOTES (call_insn)
4303 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
4306 /* Fetch the return address for the frame COUNT steps up from
4307 the current frame, after the prologue. FRAMEADDR is the
4308 frame pointer of the COUNT frame.
4310 We want to ignore any export stub remnants here. To handle this,
4311 we examine the code at the return address, and if it is an export
4312 stub, we return a memory rtx for the stub return address stored
4315 The value returned is used in two different ways:
4317 1. To find a function's caller.
4319 2. To change the return address for a function.
4321 This function handles most instances of case 1; however, it will
4322 fail if there are two levels of stubs to execute on the return
4323 path. The only way I believe that can happen is if the return value
4324 needs a parameter relocation, which never happens for C code.
4326 This function handles most instances of case 2; however, it will
4327 fail if we did not originally have stub code on the return path
4328 but will need stub code on the new return path. This can happen if
4329 the caller & callee are both in the main program, but the new
4330 return location is in a shared library. */
4333 return_addr_rtx (int count, rtx frameaddr)
4343 rp = get_hard_reg_initial_val (Pmode, 2);
4345 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4348 saved_rp = gen_reg_rtx (Pmode);
4349 emit_move_insn (saved_rp, rp);
4351 /* Get pointer to the instruction stream. We have to mask out the
4352 privilege level from the two low order bits of the return address
4353 pointer here so that ins will point to the start of the first
4354 instruction that would have been executed if we returned. */
4355 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4356 label = gen_label_rtx ();
4358 /* Check the instruction stream at the normal return address for the
4361 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4362 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4363 0x00011820 | stub+16: mtsp r1,sr0
4364 0xe0400002 | stub+20: be,n 0(sr0,rp)
4366 If it is an export stub, than our return address is really in
4369 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4370 NULL_RTX, SImode, 1);
4371 emit_jump_insn (gen_bne (label));
4373 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4374 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4375 emit_jump_insn (gen_bne (label));
4377 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4378 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4379 emit_jump_insn (gen_bne (label));
4381 /* 0xe0400002 must be specified as -532676606 so that it won't be
4382 rejected as an invalid immediate operand on 64-bit hosts. */
4383 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4384 GEN_INT (-532676606), NE, NULL_RTX, SImode, 1);
4386 /* If there is no export stub then just use the value saved from
4387 the return pointer register. */
4389 emit_jump_insn (gen_bne (label));
4391 /* Here we know that our return address points to an export
4392 stub. We don't want to return the address of the export stub,
4393 but rather the return address of the export stub. That return
4394 address is stored at -24[frameaddr]. */
4396 emit_move_insn (saved_rp,
4398 memory_address (Pmode,
4399 plus_constant (frameaddr,
4406 /* This is only valid once reload has completed because it depends on
4407 knowing exactly how much (if any) frame there is and...
4409 It's only valid if there is no frame marker to de-allocate and...
4411 It's only valid if %r2 hasn't been saved into the caller's frame
4412 (we're not profiling and %r2 isn't live anywhere). */
4414 hppa_can_use_return_insn_p (void)
4416 return (reload_completed
4417 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
4418 && ! df_regs_ever_live_p (2)
4419 && ! frame_pointer_needed);
4423 emit_bcond_fp (enum rtx_code code, rtx operand0)
4425 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4426 gen_rtx_IF_THEN_ELSE (VOIDmode,
4427 gen_rtx_fmt_ee (code,
4429 gen_rtx_REG (CCFPmode, 0),
4431 gen_rtx_LABEL_REF (VOIDmode, operand0),
4437 gen_cmp_fp (enum rtx_code code, rtx operand0, rtx operand1)
4439 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4440 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4443 /* Adjust the cost of a scheduling dependency. Return the new cost of
4444 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4447 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4449 enum attr_type attr_type;
4451 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4452 true dependencies as they are described with bypasses now. */
4453 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4456 if (! recog_memoized (insn))
4459 attr_type = get_attr_type (insn);
4461 switch (REG_NOTE_KIND (link))
4464 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4467 if (attr_type == TYPE_FPLOAD)
4469 rtx pat = PATTERN (insn);
4470 rtx dep_pat = PATTERN (dep_insn);
4471 if (GET_CODE (pat) == PARALLEL)
4473 /* This happens for the fldXs,mb patterns. */
4474 pat = XVECEXP (pat, 0, 0);
4476 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4477 /* If this happens, we have to extend this to schedule
4478 optimally. Return 0 for now. */
4481 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4483 if (! recog_memoized (dep_insn))
4485 switch (get_attr_type (dep_insn))
4492 case TYPE_FPSQRTSGL:
4493 case TYPE_FPSQRTDBL:
4494 /* A fpload can't be issued until one cycle before a
4495 preceding arithmetic operation has finished if
4496 the target of the fpload is any of the sources
4497 (or destination) of the arithmetic operation. */
4498 return insn_default_latency (dep_insn) - 1;
4505 else if (attr_type == TYPE_FPALU)
4507 rtx pat = PATTERN (insn);
4508 rtx dep_pat = PATTERN (dep_insn);
4509 if (GET_CODE (pat) == PARALLEL)
4511 /* This happens for the fldXs,mb patterns. */
4512 pat = XVECEXP (pat, 0, 0);
4514 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4515 /* If this happens, we have to extend this to schedule
4516 optimally. Return 0 for now. */
4519 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4521 if (! recog_memoized (dep_insn))
4523 switch (get_attr_type (dep_insn))
4527 case TYPE_FPSQRTSGL:
4528 case TYPE_FPSQRTDBL:
4529 /* An ALU flop can't be issued until two cycles before a
4530 preceding divide or sqrt operation has finished if
4531 the target of the ALU flop is any of the sources
4532 (or destination) of the divide or sqrt operation. */
4533 return insn_default_latency (dep_insn) - 2;
4541 /* For other anti dependencies, the cost is 0. */
4544 case REG_DEP_OUTPUT:
4545 /* Output dependency; DEP_INSN writes a register that INSN writes some
4547 if (attr_type == TYPE_FPLOAD)
4549 rtx pat = PATTERN (insn);
4550 rtx dep_pat = PATTERN (dep_insn);
4551 if (GET_CODE (pat) == PARALLEL)
4553 /* This happens for the fldXs,mb patterns. */
4554 pat = XVECEXP (pat, 0, 0);
4556 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4557 /* If this happens, we have to extend this to schedule
4558 optimally. Return 0 for now. */
4561 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4563 if (! recog_memoized (dep_insn))
4565 switch (get_attr_type (dep_insn))
4572 case TYPE_FPSQRTSGL:
4573 case TYPE_FPSQRTDBL:
4574 /* A fpload can't be issued until one cycle before a
4575 preceding arithmetic operation has finished if
4576 the target of the fpload is the destination of the
4577 arithmetic operation.
4579 Exception: For PA7100LC, PA7200 and PA7300, the cost
4580 is 3 cycles, unless they bundle together. We also
4581 pay the penalty if the second insn is a fpload. */
4582 return insn_default_latency (dep_insn) - 1;
4589 else if (attr_type == TYPE_FPALU)
4591 rtx pat = PATTERN (insn);
4592 rtx dep_pat = PATTERN (dep_insn);
4593 if (GET_CODE (pat) == PARALLEL)
4595 /* This happens for the fldXs,mb patterns. */
4596 pat = XVECEXP (pat, 0, 0);
4598 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4599 /* If this happens, we have to extend this to schedule
4600 optimally. Return 0 for now. */
4603 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4605 if (! recog_memoized (dep_insn))
4607 switch (get_attr_type (dep_insn))
4611 case TYPE_FPSQRTSGL:
4612 case TYPE_FPSQRTDBL:
4613 /* An ALU flop can't be issued until two cycles before a
4614 preceding divide or sqrt operation has finished if
4615 the target of the ALU flop is also the target of
4616 the divide or sqrt operation. */
4617 return insn_default_latency (dep_insn) - 2;
4625 /* For other output dependencies, the cost is 0. */
4633 /* Adjust scheduling priorities. We use this to try and keep addil
4634 and the next use of %r1 close together. */
4636 pa_adjust_priority (rtx insn, int priority)
4638 rtx set = single_set (insn);
4642 src = SET_SRC (set);
4643 dest = SET_DEST (set);
4644 if (GET_CODE (src) == LO_SUM
4645 && symbolic_operand (XEXP (src, 1), VOIDmode)
4646 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4649 else if (GET_CODE (src) == MEM
4650 && GET_CODE (XEXP (src, 0)) == LO_SUM
4651 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4652 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4655 else if (GET_CODE (dest) == MEM
4656 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4657 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4658 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4664 /* The 700 can only issue a single insn at a time.
4665 The 7XXX processors can issue two insns at a time.
4666 The 8000 can issue 4 insns at a time. */
4668 pa_issue_rate (void)
4672 case PROCESSOR_700: return 1;
4673 case PROCESSOR_7100: return 2;
4674 case PROCESSOR_7100LC: return 2;
4675 case PROCESSOR_7200: return 2;
4676 case PROCESSOR_7300: return 2;
4677 case PROCESSOR_8000: return 4;
4686 /* Return any length adjustment needed by INSN which already has its length
4687 computed as LENGTH. Return zero if no adjustment is necessary.
4689 For the PA: function calls, millicode calls, and backwards short
4690 conditional branches with unfilled delay slots need an adjustment by +1
4691 (to account for the NOP which will be inserted into the instruction stream).
4693 Also compute the length of an inline block move here as it is too
4694 complicated to express as a length attribute in pa.md. */
4696 pa_adjust_insn_length (rtx insn, int length)
4698 rtx pat = PATTERN (insn);
4700 /* Jumps inside switch tables which have unfilled delay slots need
4702 if (GET_CODE (insn) == JUMP_INSN
4703 && GET_CODE (pat) == PARALLEL
4704 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4706 /* Millicode insn with an unfilled delay slot. */
4707 else if (GET_CODE (insn) == INSN
4708 && GET_CODE (pat) != SEQUENCE
4709 && GET_CODE (pat) != USE
4710 && GET_CODE (pat) != CLOBBER
4711 && get_attr_type (insn) == TYPE_MILLI)
4713 /* Block move pattern. */
4714 else if (GET_CODE (insn) == INSN
4715 && GET_CODE (pat) == PARALLEL
4716 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4717 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4718 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4719 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4720 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4721 return compute_movmem_length (insn) - 4;
4722 /* Block clear pattern. */
4723 else if (GET_CODE (insn) == INSN
4724 && GET_CODE (pat) == PARALLEL
4725 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4726 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4727 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4728 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4729 return compute_clrmem_length (insn) - 4;
4730 /* Conditional branch with an unfilled delay slot. */
4731 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4733 /* Adjust a short backwards conditional with an unfilled delay slot. */
4734 if (GET_CODE (pat) == SET
4736 && ! forward_branch_p (insn))
4738 else if (GET_CODE (pat) == PARALLEL
4739 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4742 /* Adjust dbra insn with short backwards conditional branch with
4743 unfilled delay slot -- only for case where counter is in a
4744 general register register. */
4745 else if (GET_CODE (pat) == PARALLEL
4746 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4747 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4748 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4750 && ! forward_branch_p (insn))
4758 /* Print operand X (an rtx) in assembler syntax to file FILE.
4759 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4760 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4763 print_operand (FILE *file, rtx x, int code)
4768 /* Output a 'nop' if there's nothing for the delay slot. */
4769 if (dbr_sequence_length () == 0)
4770 fputs ("\n\tnop", file);
4773 /* Output a nullification completer if there's nothing for the */
4774 /* delay slot or nullification is requested. */
4775 if (dbr_sequence_length () == 0 ||
4777 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4781 /* Print out the second register name of a register pair.
4782 I.e., R (6) => 7. */
4783 fputs (reg_names[REGNO (x) + 1], file);
4786 /* A register or zero. */
4788 || (x == CONST0_RTX (DFmode))
4789 || (x == CONST0_RTX (SFmode)))
4791 fputs ("%r0", file);
4797 /* A register or zero (floating point). */
4799 || (x == CONST0_RTX (DFmode))
4800 || (x == CONST0_RTX (SFmode)))
4802 fputs ("%fr0", file);
4811 xoperands[0] = XEXP (XEXP (x, 0), 0);
4812 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4813 output_global_address (file, xoperands[1], 0);
4814 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4818 case 'C': /* Plain (C)ondition */
4820 switch (GET_CODE (x))
4823 fputs ("=", file); break;
4825 fputs ("<>", file); break;
4827 fputs (">", file); break;
4829 fputs (">=", file); break;
4831 fputs (">>=", file); break;
4833 fputs (">>", file); break;
4835 fputs ("<", file); break;
4837 fputs ("<=", file); break;
4839 fputs ("<<=", file); break;
4841 fputs ("<<", file); break;
4846 case 'N': /* Condition, (N)egated */
4847 switch (GET_CODE (x))
4850 fputs ("<>", file); break;
4852 fputs ("=", file); break;
4854 fputs ("<=", file); break;
4856 fputs ("<", file); break;
4858 fputs ("<<", file); break;
4860 fputs ("<<=", file); break;
4862 fputs (">=", file); break;
4864 fputs (">", file); break;
4866 fputs (">>", file); break;
4868 fputs (">>=", file); break;
4873 /* For floating point comparisons. Note that the output
4874 predicates are the complement of the desired mode. The
4875 conditions for GT, GE, LT, LE and LTGT cause an invalid
4876 operation exception if the result is unordered and this
4877 exception is enabled in the floating-point status register. */
4879 switch (GET_CODE (x))
4882 fputs ("!=", file); break;
4884 fputs ("=", file); break;
4886 fputs ("!>", file); break;
4888 fputs ("!>=", file); break;
4890 fputs ("!<", file); break;
4892 fputs ("!<=", file); break;
4894 fputs ("!<>", file); break;
4896 fputs ("!?<=", file); break;
4898 fputs ("!?<", file); break;
4900 fputs ("!?>=", file); break;
4902 fputs ("!?>", file); break;
4904 fputs ("!?=", file); break;
4906 fputs ("!?", file); break;
4908 fputs ("?", file); break;
4913 case 'S': /* Condition, operands are (S)wapped. */
4914 switch (GET_CODE (x))
4917 fputs ("=", file); break;
4919 fputs ("<>", file); break;
4921 fputs ("<", file); break;
4923 fputs ("<=", file); break;
4925 fputs ("<<=", file); break;
4927 fputs ("<<", file); break;
4929 fputs (">", file); break;
4931 fputs (">=", file); break;
4933 fputs (">>=", file); break;
4935 fputs (">>", file); break;
4940 case 'B': /* Condition, (B)oth swapped and negate. */
4941 switch (GET_CODE (x))
4944 fputs ("<>", file); break;
4946 fputs ("=", file); break;
4948 fputs (">=", file); break;
4950 fputs (">", file); break;
4952 fputs (">>", file); break;
4954 fputs (">>=", file); break;
4956 fputs ("<=", file); break;
4958 fputs ("<", file); break;
4960 fputs ("<<", file); break;
4962 fputs ("<<=", file); break;
4968 gcc_assert (GET_CODE (x) == CONST_INT);
4969 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4972 gcc_assert (GET_CODE (x) == CONST_INT);
4973 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4976 gcc_assert (GET_CODE (x) == CONST_INT);
4977 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4980 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
4981 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4984 gcc_assert (GET_CODE (x) == CONST_INT);
4985 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4988 gcc_assert (GET_CODE (x) == CONST_INT);
4989 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4992 if (GET_CODE (x) == CONST_INT)
4997 switch (GET_CODE (XEXP (x, 0)))
5001 if (ASSEMBLER_DIALECT == 0)
5002 fputs ("s,mb", file);
5004 fputs (",mb", file);
5008 if (ASSEMBLER_DIALECT == 0)
5009 fputs ("s,ma", file);
5011 fputs (",ma", file);
5014 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5015 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5017 if (ASSEMBLER_DIALECT == 0)
5020 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5021 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5023 if (ASSEMBLER_DIALECT == 0)
5024 fputs ("x,s", file);
5028 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5032 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5038 output_global_address (file, x, 0);
5041 output_global_address (file, x, 1);
5043 case 0: /* Don't do anything special */
5048 compute_zdepwi_operands (INTVAL (x), op);
5049 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5055 compute_zdepdi_operands (INTVAL (x), op);
5056 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5060 /* We can get here from a .vtable_inherit due to our
5061 CONSTANT_ADDRESS_P rejecting perfectly good constant
5067 if (GET_CODE (x) == REG)
5069 fputs (reg_names [REGNO (x)], file);
5070 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5076 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5077 && (REGNO (x) & 1) == 0)
5080 else if (GET_CODE (x) == MEM)
5082 int size = GET_MODE_SIZE (GET_MODE (x));
5083 rtx base = NULL_RTX;
5084 switch (GET_CODE (XEXP (x, 0)))
5088 base = XEXP (XEXP (x, 0), 0);
5089 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5093 base = XEXP (XEXP (x, 0), 0);
5094 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5097 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5098 fprintf (file, "%s(%s)",
5099 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5100 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5101 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5102 fprintf (file, "%s(%s)",
5103 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5104 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5105 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5106 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5108 /* Because the REG_POINTER flag can get lost during reload,
5109 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5110 index and base registers in the combined move patterns. */
5111 rtx base = XEXP (XEXP (x, 0), 1);
5112 rtx index = XEXP (XEXP (x, 0), 0);
5114 fprintf (file, "%s(%s)",
5115 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5118 output_address (XEXP (x, 0));
5121 output_address (XEXP (x, 0));
5126 output_addr_const (file, x);
5129 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5132 output_global_address (FILE *file, rtx x, int round_constant)
5135 /* Imagine (high (const (plus ...))). */
5136 if (GET_CODE (x) == HIGH)
5139 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5140 output_addr_const (file, x);
5141 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5143 output_addr_const (file, x);
5144 fputs ("-$global$", file);
5146 else if (GET_CODE (x) == CONST)
5148 const char *sep = "";
5149 int offset = 0; /* assembler wants -$global$ at end */
5150 rtx base = NULL_RTX;
5152 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5155 base = XEXP (XEXP (x, 0), 0);
5156 output_addr_const (file, base);
5159 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5165 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5168 base = XEXP (XEXP (x, 0), 1);
5169 output_addr_const (file, base);
5172 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5178 /* How bogus. The compiler is apparently responsible for
5179 rounding the constant if it uses an LR field selector.
5181 The linker and/or assembler seem a better place since
5182 they have to do this kind of thing already.
5184 If we fail to do this, HP's optimizing linker may eliminate
5185 an addil, but not update the ldw/stw/ldo instruction that
5186 uses the result of the addil. */
5188 offset = ((offset + 0x1000) & ~0x1fff);
5190 switch (GET_CODE (XEXP (x, 0)))
5203 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5211 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5212 fputs ("-$global$", file);
5214 fprintf (file, "%s%d", sep, offset);
5217 output_addr_const (file, x);
5220 /* Output boilerplate text to appear at the beginning of the file.
5221 There are several possible versions. */
5222 #define aputs(x) fputs(x, asm_out_file)
5224 pa_file_start_level (void)
5227 aputs ("\t.LEVEL 2.0w\n");
5228 else if (TARGET_PA_20)
5229 aputs ("\t.LEVEL 2.0\n");
5230 else if (TARGET_PA_11)
5231 aputs ("\t.LEVEL 1.1\n");
5233 aputs ("\t.LEVEL 1.0\n");
5237 pa_file_start_space (int sortspace)
5239 aputs ("\t.SPACE $PRIVATE$");
5242 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5243 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5244 "\n\t.SPACE $TEXT$");
5247 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5248 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5252 pa_file_start_file (int want_version)
5254 if (write_symbols != NO_DEBUG)
5256 output_file_directive (asm_out_file, main_input_filename);
5258 aputs ("\t.version\t\"01.01\"\n");
5263 pa_file_start_mcount (const char *aswhat)
5266 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5270 pa_elf_file_start (void)
5272 pa_file_start_level ();
5273 pa_file_start_mcount ("ENTRY");
5274 pa_file_start_file (0);
5278 pa_som_file_start (void)
5280 pa_file_start_level ();
5281 pa_file_start_space (0);
5282 aputs ("\t.IMPORT $global$,DATA\n"
5283 "\t.IMPORT $$dyncall,MILLICODE\n");
5284 pa_file_start_mcount ("CODE");
5285 pa_file_start_file (0);
5289 pa_linux_file_start (void)
5291 pa_file_start_file (1);
5292 pa_file_start_level ();
5293 pa_file_start_mcount ("CODE");
5297 pa_hpux64_gas_file_start (void)
5299 pa_file_start_level ();
5300 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5302 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5304 pa_file_start_file (1);
5308 pa_hpux64_hpas_file_start (void)
5310 pa_file_start_level ();
5311 pa_file_start_space (1);
5312 pa_file_start_mcount ("CODE");
5313 pa_file_start_file (0);
5317 /* Search the deferred plabel list for SYMBOL and return its internal
5318 label. If an entry for SYMBOL is not found, a new entry is created. */
5321 get_deferred_plabel (rtx symbol)
5323 const char *fname = XSTR (symbol, 0);
5326 /* See if we have already put this function on the list of deferred
5327 plabels. This list is generally small, so a liner search is not
5328 too ugly. If it proves too slow replace it with something faster. */
5329 for (i = 0; i < n_deferred_plabels; i++)
5330 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5333 /* If the deferred plabel list is empty, or this entry was not found
5334 on the list, create a new entry on the list. */
5335 if (deferred_plabels == NULL || i == n_deferred_plabels)
5339 if (deferred_plabels == 0)
5340 deferred_plabels = (struct deferred_plabel *)
5341 ggc_alloc (sizeof (struct deferred_plabel));
5343 deferred_plabels = (struct deferred_plabel *)
5344 ggc_realloc (deferred_plabels,
5345 ((n_deferred_plabels + 1)
5346 * sizeof (struct deferred_plabel)));
5348 i = n_deferred_plabels++;
5349 deferred_plabels[i].internal_label = gen_label_rtx ();
5350 deferred_plabels[i].symbol = symbol;
5352 /* Gross. We have just implicitly taken the address of this
5353 function. Mark it in the same manner as assemble_name. */
5354 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5356 mark_referenced (id);
5359 return deferred_plabels[i].internal_label;
5363 output_deferred_plabels (void)
5367 /* If we have some deferred plabels, then we need to switch into the
5368 data or readonly data section, and align it to a 4 byte boundary
5369 before outputting the deferred plabels. */
5370 if (n_deferred_plabels)
5372 switch_to_section (flag_pic ? data_section : readonly_data_section);
5373 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5376 /* Now output the deferred plabels. */
5377 for (i = 0; i < n_deferred_plabels; i++)
5379 targetm.asm_out.internal_label (asm_out_file, "L",
5380 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5381 assemble_integer (deferred_plabels[i].symbol,
5382 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5386 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5387 /* Initialize optabs to point to HPUX long double emulation routines. */
5389 pa_hpux_init_libfuncs (void)
5391 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5392 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5393 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5394 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5395 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5396 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5397 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5398 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5399 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5401 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5402 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5403 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5404 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5405 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5406 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5407 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5409 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5410 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5411 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5412 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5414 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5415 ? "__U_Qfcnvfxt_quad_to_sgl"
5416 : "_U_Qfcnvfxt_quad_to_sgl");
5417 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5418 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5419 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5421 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5422 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5423 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5424 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5428 /* HP's millicode routines mean something special to the assembler.
5429 Keep track of which ones we have used. */
5431 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5432 static void import_milli (enum millicodes);
5433 static char imported[(int) end1000];
5434 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5435 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5436 #define MILLI_START 10
5439 import_milli (enum millicodes code)
5441 char str[sizeof (import_string)];
5443 if (!imported[(int) code])
5445 imported[(int) code] = 1;
5446 strcpy (str, import_string);
5447 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5448 output_asm_insn (str, 0);
5452 /* The register constraints have put the operands and return value in
5453 the proper registers. */
5456 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5458 import_milli (mulI);
5459 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5462 /* Emit the rtl for doing a division by a constant. */
5464 /* Do magic division millicodes exist for this value? */
5465 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5467 /* We'll use an array to keep track of the magic millicodes and
5468 whether or not we've used them already. [n][0] is signed, [n][1] is
5471 static int div_milli[16][2];
5474 emit_hpdiv_const (rtx *operands, int unsignedp)
5476 if (GET_CODE (operands[2]) == CONST_INT
5477 && INTVAL (operands[2]) > 0
5478 && INTVAL (operands[2]) < 16
5479 && magic_milli[INTVAL (operands[2])])
5481 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5483 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5487 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5488 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5490 gen_rtx_REG (SImode, 26),
5492 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5493 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5494 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5495 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5496 gen_rtx_CLOBBER (VOIDmode, ret))));
5497 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5504 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5508 /* If the divisor is a constant, try to use one of the special
5510 if (GET_CODE (operands[0]) == CONST_INT)
5512 static char buf[100];
5513 divisor = INTVAL (operands[0]);
5514 if (!div_milli[divisor][unsignedp])
5516 div_milli[divisor][unsignedp] = 1;
5518 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5520 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5524 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5525 INTVAL (operands[0]));
5526 return output_millicode_call (insn,
5527 gen_rtx_SYMBOL_REF (SImode, buf));
5531 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5532 INTVAL (operands[0]));
5533 return output_millicode_call (insn,
5534 gen_rtx_SYMBOL_REF (SImode, buf));
5537 /* Divisor isn't a special constant. */
5542 import_milli (divU);
5543 return output_millicode_call (insn,
5544 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5548 import_milli (divI);
5549 return output_millicode_call (insn,
5550 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5555 /* Output a $$rem millicode to do mod. */
5558 output_mod_insn (int unsignedp, rtx insn)
5562 import_milli (remU);
5563 return output_millicode_call (insn,
5564 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5568 import_milli (remI);
5569 return output_millicode_call (insn,
5570 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5575 output_arg_descriptor (rtx call_insn)
5577 const char *arg_regs[4];
5578 enum machine_mode arg_mode;
5580 int i, output_flag = 0;
5583 /* We neither need nor want argument location descriptors for the
5584 64bit runtime environment or the ELF32 environment. */
5585 if (TARGET_64BIT || TARGET_ELF32)
5588 for (i = 0; i < 4; i++)
5591 /* Specify explicitly that no argument relocations should take place
5592 if using the portable runtime calling conventions. */
5593 if (TARGET_PORTABLE_RUNTIME)
5595 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5600 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5601 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5602 link; link = XEXP (link, 1))
5604 rtx use = XEXP (link, 0);
5606 if (! (GET_CODE (use) == USE
5607 && GET_CODE (XEXP (use, 0)) == REG
5608 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5611 arg_mode = GET_MODE (XEXP (use, 0));
5612 regno = REGNO (XEXP (use, 0));
5613 if (regno >= 23 && regno <= 26)
5615 arg_regs[26 - regno] = "GR";
5616 if (arg_mode == DImode)
5617 arg_regs[25 - regno] = "GR";
5619 else if (regno >= 32 && regno <= 39)
5621 if (arg_mode == SFmode)
5622 arg_regs[(regno - 32) / 2] = "FR";
5625 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5626 arg_regs[(regno - 34) / 2] = "FR";
5627 arg_regs[(regno - 34) / 2 + 1] = "FU";
5629 arg_regs[(regno - 34) / 2] = "FU";
5630 arg_regs[(regno - 34) / 2 + 1] = "FR";
5635 fputs ("\t.CALL ", asm_out_file);
5636 for (i = 0; i < 4; i++)
5641 fputc (',', asm_out_file);
5642 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5645 fputc ('\n', asm_out_file);
5648 static enum reg_class
5649 pa_secondary_reload (bool in_p, rtx x, enum reg_class class,
5650 enum machine_mode mode, secondary_reload_info *sri)
5652 int is_symbolic, regno;
5654 /* Handle the easy stuff first. */
5655 if (class == R1_REGS)
5661 if (class == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5667 /* If we have something like (mem (mem (...)), we can safely assume the
5668 inner MEM will end up in a general register after reloading, so there's
5669 no need for a secondary reload. */
5670 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5673 /* Trying to load a constant into a FP register during PIC code
5674 generation requires %r1 as a scratch register. */
5676 && (mode == SImode || mode == DImode)
5677 && FP_REG_CLASS_P (class)
5678 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5680 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5681 : CODE_FOR_reload_indi_r1);
5685 /* Profiling showed the PA port spends about 1.3% of its compilation
5686 time in true_regnum from calls inside pa_secondary_reload_class. */
5687 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5688 regno = true_regnum (x);
5690 /* Handle out of range displacement for integer mode loads/stores of
5692 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5693 && GET_MODE_CLASS (mode) == MODE_INT
5694 && FP_REG_CLASS_P (class))
5695 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5697 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5701 /* A SAR<->FP register copy requires a secondary register (GPR) as
5702 well as secondary memory. */
5703 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5704 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5705 || (class == SHIFT_REGS
5706 && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5708 sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5712 /* Secondary reloads of symbolic operands require %r1 as a scratch
5713 register when we're generating PIC code and the operand isn't
5715 if (GET_CODE (x) == HIGH)
5718 /* Profiling has showed GCC spends about 2.6% of its compilation
5719 time in symbolic_operand from calls inside pa_secondary_reload_class.
5720 So, we use an inline copy to avoid useless work. */
5721 switch (GET_CODE (x))
5726 is_symbolic = !SYMBOL_REF_TLS_MODEL (x);
5733 is_symbolic = (((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
5734 && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
5735 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
5736 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5743 if (is_symbolic && (flag_pic || !read_only_operand (x, VOIDmode)))
5745 gcc_assert (mode == SImode || mode == DImode);
5746 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5747 : CODE_FOR_reload_indi_r1);
5753 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5754 by invisible reference. As a GCC extension, we also pass anything
5755 with a zero or variable size by reference.
5757 The 64-bit runtime does not describe passing any types by invisible
5758 reference. The internals of GCC can't currently handle passing
5759 empty structures, and zero or variable length arrays when they are
5760 not passed entirely on the stack or by reference. Thus, as a GCC
5761 extension, we pass these types by reference. The HP compiler doesn't
5762 support these types, so hopefully there shouldn't be any compatibility
5763 issues. This may have to be revisited when HP releases a C99 compiler
5764 or updates the ABI. */
5767 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5768 enum machine_mode mode, tree type,
5769 bool named ATTRIBUTE_UNUSED)
5774 size = int_size_in_bytes (type);
5776 size = GET_MODE_SIZE (mode);
5781 return size <= 0 || size > 8;
5785 function_arg_padding (enum machine_mode mode, tree type)
5788 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5790 /* Return none if justification is not required. */
5792 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5793 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5796 /* The directions set here are ignored when a BLKmode argument larger
5797 than a word is placed in a register. Different code is used for
5798 the stack and registers. This makes it difficult to have a
5799 consistent data representation for both the stack and registers.
5800 For both runtimes, the justification and padding for arguments on
5801 the stack and in registers should be identical. */
5803 /* The 64-bit runtime specifies left justification for aggregates. */
5806 /* The 32-bit runtime architecture specifies right justification.
5807 When the argument is passed on the stack, the argument is padded
5808 with garbage on the left. The HP compiler pads with zeros. */
5812 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5819 /* Do what is necessary for `va_start'. We look at the current function
5820 to determine if stdargs or varargs is used and fill in an initial
5821 va_list. A pointer to this constructor is returned. */
5824 hppa_builtin_saveregs (void)
5827 tree fntype = TREE_TYPE (current_function_decl);
5828 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5829 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5830 != void_type_node)))
5831 ? UNITS_PER_WORD : 0);
5834 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5836 offset = current_function_arg_offset_rtx;
5842 /* Adjust for varargs/stdarg differences. */
5844 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5846 offset = current_function_arg_offset_rtx;
5848 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5849 from the incoming arg pointer and growing to larger addresses. */
5850 for (i = 26, off = -64; i >= 19; i--, off += 8)
5851 emit_move_insn (gen_rtx_MEM (word_mode,
5852 plus_constant (arg_pointer_rtx, off)),
5853 gen_rtx_REG (word_mode, i));
5855 /* The incoming args pointer points just beyond the flushback area;
5856 normally this is not a serious concern. However, when we are doing
5857 varargs/stdargs we want to make the arg pointer point to the start
5858 of the incoming argument area. */
5859 emit_move_insn (virtual_incoming_args_rtx,
5860 plus_constant (arg_pointer_rtx, -64));
5862 /* Now return a pointer to the first anonymous argument. */
5863 return copy_to_reg (expand_binop (Pmode, add_optab,
5864 virtual_incoming_args_rtx,
5865 offset, 0, 0, OPTAB_LIB_WIDEN));
5868 /* Store general registers on the stack. */
5869 dest = gen_rtx_MEM (BLKmode,
5870 plus_constant (current_function_internal_arg_pointer,
5872 set_mem_alias_set (dest, get_varargs_alias_set ());
5873 set_mem_align (dest, BITS_PER_WORD);
5874 move_block_from_reg (23, dest, 4);
5876 /* move_block_from_reg will emit code to store the argument registers
5877 individually as scalar stores.
5879 However, other insns may later load from the same addresses for
5880 a structure load (passing a struct to a varargs routine).
5882 The alias code assumes that such aliasing can never happen, so we
5883 have to keep memory referencing insns from moving up beyond the
5884 last argument register store. So we emit a blockage insn here. */
5885 emit_insn (gen_blockage ());
5887 return copy_to_reg (expand_binop (Pmode, add_optab,
5888 current_function_internal_arg_pointer,
5889 offset, 0, 0, OPTAB_LIB_WIDEN));
5893 hppa_va_start (tree valist, rtx nextarg)
5895 nextarg = expand_builtin_saveregs ();
5896 std_expand_builtin_va_start (valist, nextarg);
5900 hppa_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p, tree *post_p)
5904 /* Args grow upward. We can use the generic routines. */
5905 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5907 else /* !TARGET_64BIT */
5909 tree ptr = build_pointer_type (type);
5912 unsigned int size, ofs;
5915 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
5919 ptr = build_pointer_type (type);
5921 size = int_size_in_bytes (type);
5922 valist_type = TREE_TYPE (valist);
5924 /* Args grow down. Not handled by generic routines. */
5926 u = fold_convert (sizetype, size_in_bytes (type));
5927 u = fold_build1 (NEGATE_EXPR, sizetype, u);
5928 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
5930 /* Copied from va-pa.h, but we probably don't need to align to
5931 word size, since we generate and preserve that invariant. */
5932 u = size_int (size > 4 ? -8 : -4);
5933 t = fold_convert (sizetype, t);
5934 t = build2 (BIT_AND_EXPR, sizetype, t, u);
5935 t = fold_convert (valist_type, t);
5937 t = build2 (MODIFY_EXPR, valist_type, valist, t);
5939 ofs = (8 - size) % 4;
5943 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
5946 t = fold_convert (ptr, t);
5947 t = build_va_arg_indirect_ref (t);
5950 t = build_va_arg_indirect_ref (t);
5956 /* True if MODE is valid for the target. By "valid", we mean able to
5957 be manipulated in non-trivial ways. In particular, this means all
5958 the arithmetic is supported.
5960 Currently, TImode is not valid as the HP 64-bit runtime documentation
5961 doesn't document the alignment and calling conventions for this type.
5962 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
5963 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
5966 pa_scalar_mode_supported_p (enum machine_mode mode)
5968 int precision = GET_MODE_PRECISION (mode);
5970 switch (GET_MODE_CLASS (mode))
5972 case MODE_PARTIAL_INT:
5974 if (precision == CHAR_TYPE_SIZE)
5976 if (precision == SHORT_TYPE_SIZE)
5978 if (precision == INT_TYPE_SIZE)
5980 if (precision == LONG_TYPE_SIZE)
5982 if (precision == LONG_LONG_TYPE_SIZE)
5987 if (precision == FLOAT_TYPE_SIZE)
5989 if (precision == DOUBLE_TYPE_SIZE)
5991 if (precision == LONG_DOUBLE_TYPE_SIZE)
5995 case MODE_DECIMAL_FLOAT:
6003 /* This routine handles all the normal conditional branch sequences we
6004 might need to generate. It handles compare immediate vs compare
6005 register, nullification of delay slots, varying length branches,
6006 negated branches, and all combinations of the above. It returns the
6007 output appropriate to emit the branch corresponding to all given
6011 output_cbranch (rtx *operands, int negated, rtx insn)
6013 static char buf[100];
6015 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6016 int length = get_attr_length (insn);
6019 /* A conditional branch to the following instruction (e.g. the delay slot)
6020 is asking for a disaster. This can happen when not optimizing and
6021 when jump optimization fails.
6023 While it is usually safe to emit nothing, this can fail if the
6024 preceding instruction is a nullified branch with an empty delay
6025 slot and the same branch target as this branch. We could check
6026 for this but jump optimization should eliminate nop jumps. It
6027 is always safe to emit a nop. */
6028 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6031 /* The doubleword form of the cmpib instruction doesn't have the LEU
6032 and GTU conditions while the cmpb instruction does. Since we accept
6033 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6034 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6035 operands[2] = gen_rtx_REG (DImode, 0);
6036 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6037 operands[1] = gen_rtx_REG (DImode, 0);
6039 /* If this is a long branch with its delay slot unfilled, set `nullify'
6040 as it can nullify the delay slot and save a nop. */
6041 if (length == 8 && dbr_sequence_length () == 0)
6044 /* If this is a short forward conditional branch which did not get
6045 its delay slot filled, the delay slot can still be nullified. */
6046 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6047 nullify = forward_branch_p (insn);
6049 /* A forward branch over a single nullified insn can be done with a
6050 comclr instruction. This avoids a single cycle penalty due to
6051 mis-predicted branch if we fall through (branch not taken). */
6053 && next_real_insn (insn) != 0
6054 && get_attr_length (next_real_insn (insn)) == 4
6055 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6061 /* All short conditional branches except backwards with an unfilled
6065 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6067 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6068 if (GET_MODE (operands[1]) == DImode)
6071 strcat (buf, "%B3");
6073 strcat (buf, "%S3");
6075 strcat (buf, " %2,%r1,%%r0");
6077 strcat (buf, ",n %2,%r1,%0");
6079 strcat (buf, " %2,%r1,%0");
6082 /* All long conditionals. Note a short backward branch with an
6083 unfilled delay slot is treated just like a long backward branch
6084 with an unfilled delay slot. */
6086 /* Handle weird backwards branch with a filled delay slot
6087 which is nullified. */
6088 if (dbr_sequence_length () != 0
6089 && ! forward_branch_p (insn)
6092 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6093 if (GET_MODE (operands[1]) == DImode)
6096 strcat (buf, "%S3");
6098 strcat (buf, "%B3");
6099 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6101 /* Handle short backwards branch with an unfilled delay slot.
6102 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6103 taken and untaken branches. */
6104 else if (dbr_sequence_length () == 0
6105 && ! forward_branch_p (insn)
6106 && INSN_ADDRESSES_SET_P ()
6107 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6108 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6110 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6111 if (GET_MODE (operands[1]) == DImode)
6114 strcat (buf, "%B3 %2,%r1,%0%#");
6116 strcat (buf, "%S3 %2,%r1,%0%#");
6120 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6121 if (GET_MODE (operands[1]) == DImode)
6124 strcat (buf, "%S3");
6126 strcat (buf, "%B3");
6128 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6130 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6135 /* The reversed conditional branch must branch over one additional
6136 instruction if the delay slot is filled and needs to be extracted
6137 by output_lbranch. If the delay slot is empty or this is a
6138 nullified forward branch, the instruction after the reversed
6139 condition branch must be nullified. */
6140 if (dbr_sequence_length () == 0
6141 || (nullify && forward_branch_p (insn)))
6145 operands[4] = GEN_INT (length);
6150 operands[4] = GEN_INT (length + 4);
6153 /* Create a reversed conditional branch which branches around
6154 the following insns. */
6155 if (GET_MODE (operands[1]) != DImode)
6161 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6164 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6170 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6173 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6182 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6185 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6191 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6194 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6198 output_asm_insn (buf, operands);
6199 return output_lbranch (operands[0], insn, xdelay);
6204 /* This routine handles output of long unconditional branches that
6205 exceed the maximum range of a simple branch instruction. Since
6206 we don't have a register available for the branch, we save register
6207 %r1 in the frame marker, load the branch destination DEST into %r1,
6208 execute the branch, and restore %r1 in the delay slot of the branch.
6210 Since long branches may have an insn in the delay slot and the
6211 delay slot is used to restore %r1, we in general need to extract
6212 this insn and execute it before the branch. However, to facilitate
6213 use of this function by conditional branches, we also provide an
6214 option to not extract the delay insn so that it will be emitted
6215 after the long branch. So, if there is an insn in the delay slot,
6216 it is extracted if XDELAY is nonzero.
6218 The lengths of the various long-branch sequences are 20, 16 and 24
6219 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6222 output_lbranch (rtx dest, rtx insn, int xdelay)
6226 xoperands[0] = dest;
6228 /* First, free up the delay slot. */
6229 if (xdelay && dbr_sequence_length () != 0)
6231 /* We can't handle a jump in the delay slot. */
6232 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6234 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6237 /* Now delete the delay insn. */
6238 SET_INSN_DELETED (NEXT_INSN (insn));
6241 /* Output an insn to save %r1. The runtime documentation doesn't
6242 specify whether the "Clean Up" slot in the callers frame can
6243 be clobbered by the callee. It isn't copied by HP's builtin
6244 alloca, so this suggests that it can be clobbered if necessary.
6245 The "Static Link" location is copied by HP builtin alloca, so
6246 we avoid using it. Using the cleanup slot might be a problem
6247 if we have to interoperate with languages that pass cleanup
6248 information. However, it should be possible to handle these
6249 situations with GCC's asm feature.
6251 The "Current RP" slot is reserved for the called procedure, so
6252 we try to use it when we don't have a frame of our own. It's
6253 rather unlikely that we won't have a frame when we need to emit
6256 Really the way to go long term is a register scavenger; goto
6257 the target of the jump and find a register which we can use
6258 as a scratch to hold the value in %r1. Then, we wouldn't have
6259 to free up the delay slot or clobber a slot that may be needed
6260 for other purposes. */
6263 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6264 /* Use the return pointer slot in the frame marker. */
6265 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6267 /* Use the slot at -40 in the frame marker since HP builtin
6268 alloca doesn't copy it. */
6269 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6273 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6274 /* Use the return pointer slot in the frame marker. */
6275 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6277 /* Use the "Clean Up" slot in the frame marker. In GCC,
6278 the only other use of this location is for copying a
6279 floating point double argument from a floating-point
6280 register to two general registers. The copy is done
6281 as an "atomic" operation when outputting a call, so it
6282 won't interfere with our using the location here. */
6283 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6286 if (TARGET_PORTABLE_RUNTIME)
6288 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6289 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6290 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6294 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6295 if (TARGET_SOM || !TARGET_GAS)
6297 xoperands[1] = gen_label_rtx ();
6298 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6299 targetm.asm_out.internal_label (asm_out_file, "L",
6300 CODE_LABEL_NUMBER (xoperands[1]));
6301 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6305 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6306 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6308 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6311 /* Now output a very long branch to the original target. */
6312 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6314 /* Now restore the value of %r1 in the delay slot. */
6317 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6318 return "ldd -16(%%r30),%%r1";
6320 return "ldd -40(%%r30),%%r1";
6324 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6325 return "ldw -20(%%r30),%%r1";
6327 return "ldw -12(%%r30),%%r1";
6331 /* This routine handles all the branch-on-bit conditional branch sequences we
6332 might need to generate. It handles nullification of delay slots,
6333 varying length branches, negated branches and all combinations of the
6334 above. it returns the appropriate output template to emit the branch. */
6337 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6339 static char buf[100];
6341 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6342 int length = get_attr_length (insn);
6345 /* A conditional branch to the following instruction (e.g. the delay slot) is
6346 asking for a disaster. I do not think this can happen as this pattern
6347 is only used when optimizing; jump optimization should eliminate the
6348 jump. But be prepared just in case. */
6350 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6353 /* If this is a long branch with its delay slot unfilled, set `nullify'
6354 as it can nullify the delay slot and save a nop. */
6355 if (length == 8 && dbr_sequence_length () == 0)
6358 /* If this is a short forward conditional branch which did not get
6359 its delay slot filled, the delay slot can still be nullified. */
6360 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6361 nullify = forward_branch_p (insn);
6363 /* A forward branch over a single nullified insn can be done with a
6364 extrs instruction. This avoids a single cycle penalty due to
6365 mis-predicted branch if we fall through (branch not taken). */
6368 && next_real_insn (insn) != 0
6369 && get_attr_length (next_real_insn (insn)) == 4
6370 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6377 /* All short conditional branches except backwards with an unfilled
6381 strcpy (buf, "{extrs,|extrw,s,}");
6383 strcpy (buf, "bb,");
6384 if (useskip && GET_MODE (operands[0]) == DImode)
6385 strcpy (buf, "extrd,s,*");
6386 else if (GET_MODE (operands[0]) == DImode)
6387 strcpy (buf, "bb,*");
6388 if ((which == 0 && negated)
6389 || (which == 1 && ! negated))
6394 strcat (buf, " %0,%1,1,%%r0");
6395 else if (nullify && negated)
6396 strcat (buf, ",n %0,%1,%3");
6397 else if (nullify && ! negated)
6398 strcat (buf, ",n %0,%1,%2");
6399 else if (! nullify && negated)
6400 strcat (buf, "%0,%1,%3");
6401 else if (! nullify && ! negated)
6402 strcat (buf, " %0,%1,%2");
6405 /* All long conditionals. Note a short backward branch with an
6406 unfilled delay slot is treated just like a long backward branch
6407 with an unfilled delay slot. */
6409 /* Handle weird backwards branch with a filled delay slot
6410 which is nullified. */
6411 if (dbr_sequence_length () != 0
6412 && ! forward_branch_p (insn)
6415 strcpy (buf, "bb,");
6416 if (GET_MODE (operands[0]) == DImode)
6418 if ((which == 0 && negated)
6419 || (which == 1 && ! negated))
6424 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6426 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6428 /* Handle short backwards branch with an unfilled delay slot.
6429 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6430 taken and untaken branches. */
6431 else if (dbr_sequence_length () == 0
6432 && ! forward_branch_p (insn)
6433 && INSN_ADDRESSES_SET_P ()
6434 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6435 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6437 strcpy (buf, "bb,");
6438 if (GET_MODE (operands[0]) == DImode)
6440 if ((which == 0 && negated)
6441 || (which == 1 && ! negated))
6446 strcat (buf, " %0,%1,%3%#");
6448 strcat (buf, " %0,%1,%2%#");
6452 if (GET_MODE (operands[0]) == DImode)
6453 strcpy (buf, "extrd,s,*");
6455 strcpy (buf, "{extrs,|extrw,s,}");
6456 if ((which == 0 && negated)
6457 || (which == 1 && ! negated))
6461 if (nullify && negated)
6462 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6463 else if (nullify && ! negated)
6464 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6466 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6468 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6473 /* The reversed conditional branch must branch over one additional
6474 instruction if the delay slot is filled and needs to be extracted
6475 by output_lbranch. If the delay slot is empty or this is a
6476 nullified forward branch, the instruction after the reversed
6477 condition branch must be nullified. */
6478 if (dbr_sequence_length () == 0
6479 || (nullify && forward_branch_p (insn)))
6483 operands[4] = GEN_INT (length);
6488 operands[4] = GEN_INT (length + 4);
6491 if (GET_MODE (operands[0]) == DImode)
6492 strcpy (buf, "bb,*");
6494 strcpy (buf, "bb,");
6495 if ((which == 0 && negated)
6496 || (which == 1 && !negated))
6501 strcat (buf, ",n %0,%1,.+%4");
6503 strcat (buf, " %0,%1,.+%4");
6504 output_asm_insn (buf, operands);
6505 return output_lbranch (negated ? operands[3] : operands[2],
6511 /* This routine handles all the branch-on-variable-bit conditional branch
6512 sequences we might need to generate. It handles nullification of delay
6513 slots, varying length branches, negated branches and all combinations
6514 of the above. it returns the appropriate output template to emit the
6518 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6520 static char buf[100];
6522 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6523 int length = get_attr_length (insn);
6526 /* A conditional branch to the following instruction (e.g. the delay slot) is
6527 asking for a disaster. I do not think this can happen as this pattern
6528 is only used when optimizing; jump optimization should eliminate the
6529 jump. But be prepared just in case. */
6531 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6534 /* If this is a long branch with its delay slot unfilled, set `nullify'
6535 as it can nullify the delay slot and save a nop. */
6536 if (length == 8 && dbr_sequence_length () == 0)
6539 /* If this is a short forward conditional branch which did not get
6540 its delay slot filled, the delay slot can still be nullified. */
6541 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6542 nullify = forward_branch_p (insn);
6544 /* A forward branch over a single nullified insn can be done with a
6545 extrs instruction. This avoids a single cycle penalty due to
6546 mis-predicted branch if we fall through (branch not taken). */
6549 && next_real_insn (insn) != 0
6550 && get_attr_length (next_real_insn (insn)) == 4
6551 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6558 /* All short conditional branches except backwards with an unfilled
6562 strcpy (buf, "{vextrs,|extrw,s,}");
6564 strcpy (buf, "{bvb,|bb,}");
6565 if (useskip && GET_MODE (operands[0]) == DImode)
6566 strcpy (buf, "extrd,s,*");
6567 else if (GET_MODE (operands[0]) == DImode)
6568 strcpy (buf, "bb,*");
6569 if ((which == 0 && negated)
6570 || (which == 1 && ! negated))
6575 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6576 else if (nullify && negated)
6577 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6578 else if (nullify && ! negated)
6579 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6580 else if (! nullify && negated)
6581 strcat (buf, "{%0,%3|%0,%%sar,%3}");
6582 else if (! nullify && ! negated)
6583 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6586 /* All long conditionals. Note a short backward branch with an
6587 unfilled delay slot is treated just like a long backward branch
6588 with an unfilled delay slot. */
6590 /* Handle weird backwards branch with a filled delay slot
6591 which is nullified. */
6592 if (dbr_sequence_length () != 0
6593 && ! forward_branch_p (insn)
6596 strcpy (buf, "{bvb,|bb,}");
6597 if (GET_MODE (operands[0]) == DImode)
6599 if ((which == 0 && negated)
6600 || (which == 1 && ! negated))
6605 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6607 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6609 /* Handle short backwards branch with an unfilled delay slot.
6610 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6611 taken and untaken branches. */
6612 else if (dbr_sequence_length () == 0
6613 && ! forward_branch_p (insn)
6614 && INSN_ADDRESSES_SET_P ()
6615 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6616 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6618 strcpy (buf, "{bvb,|bb,}");
6619 if (GET_MODE (operands[0]) == DImode)
6621 if ((which == 0 && negated)
6622 || (which == 1 && ! negated))
6627 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6629 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6633 strcpy (buf, "{vextrs,|extrw,s,}");
6634 if (GET_MODE (operands[0]) == DImode)
6635 strcpy (buf, "extrd,s,*");
6636 if ((which == 0 && negated)
6637 || (which == 1 && ! negated))
6641 if (nullify && negated)
6642 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6643 else if (nullify && ! negated)
6644 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6646 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6648 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6653 /* The reversed conditional branch must branch over one additional
6654 instruction if the delay slot is filled and needs to be extracted
6655 by output_lbranch. If the delay slot is empty or this is a
6656 nullified forward branch, the instruction after the reversed
6657 condition branch must be nullified. */
6658 if (dbr_sequence_length () == 0
6659 || (nullify && forward_branch_p (insn)))
6663 operands[4] = GEN_INT (length);
6668 operands[4] = GEN_INT (length + 4);
6671 if (GET_MODE (operands[0]) == DImode)
6672 strcpy (buf, "bb,*");
6674 strcpy (buf, "{bvb,|bb,}");
6675 if ((which == 0 && negated)
6676 || (which == 1 && !negated))
6681 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6683 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6684 output_asm_insn (buf, operands);
6685 return output_lbranch (negated ? operands[3] : operands[2],
6691 /* Return the output template for emitting a dbra type insn.
6693 Note it may perform some output operations on its own before
6694 returning the final output string. */
6696 output_dbra (rtx *operands, rtx insn, int which_alternative)
6698 int length = get_attr_length (insn);
6700 /* A conditional branch to the following instruction (e.g. the delay slot) is
6701 asking for a disaster. Be prepared! */
6703 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6705 if (which_alternative == 0)
6706 return "ldo %1(%0),%0";
6707 else if (which_alternative == 1)
6709 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6710 output_asm_insn ("ldw -16(%%r30),%4", operands);
6711 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6712 return "{fldws|fldw} -16(%%r30),%0";
6716 output_asm_insn ("ldw %0,%4", operands);
6717 return "ldo %1(%4),%4\n\tstw %4,%0";
6721 if (which_alternative == 0)
6723 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6726 /* If this is a long branch with its delay slot unfilled, set `nullify'
6727 as it can nullify the delay slot and save a nop. */
6728 if (length == 8 && dbr_sequence_length () == 0)
6731 /* If this is a short forward conditional branch which did not get
6732 its delay slot filled, the delay slot can still be nullified. */
6733 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6734 nullify = forward_branch_p (insn);
6740 return "addib,%C2,n %1,%0,%3";
6742 return "addib,%C2 %1,%0,%3";
6745 /* Handle weird backwards branch with a fulled delay slot
6746 which is nullified. */
6747 if (dbr_sequence_length () != 0
6748 && ! forward_branch_p (insn)
6750 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6751 /* Handle short backwards branch with an unfilled delay slot.
6752 Using a addb;nop rather than addi;bl saves 1 cycle for both
6753 taken and untaken branches. */
6754 else if (dbr_sequence_length () == 0
6755 && ! forward_branch_p (insn)
6756 && INSN_ADDRESSES_SET_P ()
6757 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6758 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6759 return "addib,%C2 %1,%0,%3%#";
6761 /* Handle normal cases. */
6763 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6765 return "addi,%N2 %1,%0,%0\n\tb %3";
6768 /* The reversed conditional branch must branch over one additional
6769 instruction if the delay slot is filled and needs to be extracted
6770 by output_lbranch. If the delay slot is empty or this is a
6771 nullified forward branch, the instruction after the reversed
6772 condition branch must be nullified. */
6773 if (dbr_sequence_length () == 0
6774 || (nullify && forward_branch_p (insn)))
6778 operands[4] = GEN_INT (length);
6783 operands[4] = GEN_INT (length + 4);
6787 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
6789 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
6791 return output_lbranch (operands[3], insn, xdelay);
6795 /* Deal with gross reload from FP register case. */
6796 else if (which_alternative == 1)
6798 /* Move loop counter from FP register to MEM then into a GR,
6799 increment the GR, store the GR into MEM, and finally reload
6800 the FP register from MEM from within the branch's delay slot. */
6801 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6803 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6805 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6806 else if (length == 28)
6807 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6810 operands[5] = GEN_INT (length - 16);
6811 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
6812 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6813 return output_lbranch (operands[3], insn, 0);
6816 /* Deal with gross reload from memory case. */
6819 /* Reload loop counter from memory, the store back to memory
6820 happens in the branch's delay slot. */
6821 output_asm_insn ("ldw %0,%4", operands);
6823 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6824 else if (length == 16)
6825 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6828 operands[5] = GEN_INT (length - 4);
6829 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
6830 return output_lbranch (operands[3], insn, 0);
6835 /* Return the output template for emitting a movb type insn.
6837 Note it may perform some output operations on its own before
6838 returning the final output string. */
6840 output_movb (rtx *operands, rtx insn, int which_alternative,
6841 int reverse_comparison)
6843 int length = get_attr_length (insn);
6845 /* A conditional branch to the following instruction (e.g. the delay slot) is
6846 asking for a disaster. Be prepared! */
6848 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6850 if (which_alternative == 0)
6851 return "copy %1,%0";
6852 else if (which_alternative == 1)
6854 output_asm_insn ("stw %1,-16(%%r30)", operands);
6855 return "{fldws|fldw} -16(%%r30),%0";
6857 else if (which_alternative == 2)
6863 /* Support the second variant. */
6864 if (reverse_comparison)
6865 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6867 if (which_alternative == 0)
6869 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6872 /* If this is a long branch with its delay slot unfilled, set `nullify'
6873 as it can nullify the delay slot and save a nop. */
6874 if (length == 8 && dbr_sequence_length () == 0)
6877 /* If this is a short forward conditional branch which did not get
6878 its delay slot filled, the delay slot can still be nullified. */
6879 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6880 nullify = forward_branch_p (insn);
6886 return "movb,%C2,n %1,%0,%3";
6888 return "movb,%C2 %1,%0,%3";
6891 /* Handle weird backwards branch with a filled delay slot
6892 which is nullified. */
6893 if (dbr_sequence_length () != 0
6894 && ! forward_branch_p (insn)
6896 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6898 /* Handle short backwards branch with an unfilled delay slot.
6899 Using a movb;nop rather than or;bl saves 1 cycle for both
6900 taken and untaken branches. */
6901 else if (dbr_sequence_length () == 0
6902 && ! forward_branch_p (insn)
6903 && INSN_ADDRESSES_SET_P ()
6904 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6905 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6906 return "movb,%C2 %1,%0,%3%#";
6907 /* Handle normal cases. */
6909 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6911 return "or,%N2 %1,%%r0,%0\n\tb %3";
6914 /* The reversed conditional branch must branch over one additional
6915 instruction if the delay slot is filled and needs to be extracted
6916 by output_lbranch. If the delay slot is empty or this is a
6917 nullified forward branch, the instruction after the reversed
6918 condition branch must be nullified. */
6919 if (dbr_sequence_length () == 0
6920 || (nullify && forward_branch_p (insn)))
6924 operands[4] = GEN_INT (length);
6929 operands[4] = GEN_INT (length + 4);
6933 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
6935 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
6937 return output_lbranch (operands[3], insn, xdelay);
6940 /* Deal with gross reload for FP destination register case. */
6941 else if (which_alternative == 1)
6943 /* Move source register to MEM, perform the branch test, then
6944 finally load the FP register from MEM from within the branch's
6946 output_asm_insn ("stw %1,-16(%%r30)", operands);
6948 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6949 else if (length == 16)
6950 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6953 operands[4] = GEN_INT (length - 4);
6954 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
6955 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6956 return output_lbranch (operands[3], insn, 0);
6959 /* Deal with gross reload from memory case. */
6960 else if (which_alternative == 2)
6962 /* Reload loop counter from memory, the store back to memory
6963 happens in the branch's delay slot. */
6965 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6966 else if (length == 12)
6967 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6970 operands[4] = GEN_INT (length);
6971 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
6973 return output_lbranch (operands[3], insn, 0);
6976 /* Handle SAR as a destination. */
6980 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6981 else if (length == 12)
6982 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
6985 operands[4] = GEN_INT (length);
6986 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
6988 return output_lbranch (operands[3], insn, 0);
6993 /* Copy any FP arguments in INSN into integer registers. */
6995 copy_fp_args (rtx insn)
7000 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7002 int arg_mode, regno;
7003 rtx use = XEXP (link, 0);
7005 if (! (GET_CODE (use) == USE
7006 && GET_CODE (XEXP (use, 0)) == REG
7007 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7010 arg_mode = GET_MODE (XEXP (use, 0));
7011 regno = REGNO (XEXP (use, 0));
7013 /* Is it a floating point register? */
7014 if (regno >= 32 && regno <= 39)
7016 /* Copy the FP register into an integer register via memory. */
7017 if (arg_mode == SFmode)
7019 xoperands[0] = XEXP (use, 0);
7020 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7021 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7022 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7026 xoperands[0] = XEXP (use, 0);
7027 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7028 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7029 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7030 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7036 /* Compute length of the FP argument copy sequence for INSN. */
7038 length_fp_args (rtx insn)
7043 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7045 int arg_mode, regno;
7046 rtx use = XEXP (link, 0);
7048 if (! (GET_CODE (use) == USE
7049 && GET_CODE (XEXP (use, 0)) == REG
7050 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7053 arg_mode = GET_MODE (XEXP (use, 0));
7054 regno = REGNO (XEXP (use, 0));
7056 /* Is it a floating point register? */
7057 if (regno >= 32 && regno <= 39)
7059 if (arg_mode == SFmode)
7069 /* Return the attribute length for the millicode call instruction INSN.
7070 The length must match the code generated by output_millicode_call.
7071 We include the delay slot in the returned length as it is better to
7072 over estimate the length than to under estimate it. */
7075 attr_length_millicode_call (rtx insn)
7077 unsigned long distance = -1;
7078 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7080 if (INSN_ADDRESSES_SET_P ())
7082 distance = (total + insn_current_reference_address (insn));
7083 if (distance < total)
7089 if (!TARGET_LONG_CALLS && distance < 7600000)
7094 else if (TARGET_PORTABLE_RUNTIME)
7098 if (!TARGET_LONG_CALLS && distance < 240000)
7101 if (TARGET_LONG_ABS_CALL && !flag_pic)
7108 /* INSN is a function call. It may have an unconditional jump
7111 CALL_DEST is the routine we are calling. */
7114 output_millicode_call (rtx insn, rtx call_dest)
7116 int attr_length = get_attr_length (insn);
7117 int seq_length = dbr_sequence_length ();
7122 xoperands[0] = call_dest;
7123 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7125 /* Handle the common case where we are sure that the branch will
7126 reach the beginning of the $CODE$ subspace. The within reach
7127 form of the $$sh_func_adrs call has a length of 28. Because
7128 it has an attribute type of multi, it never has a nonzero
7129 sequence length. The length of the $$sh_func_adrs is the same
7130 as certain out of reach PIC calls to other routines. */
7131 if (!TARGET_LONG_CALLS
7132 && ((seq_length == 0
7133 && (attr_length == 12
7134 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7135 || (seq_length != 0 && attr_length == 8)))
7137 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7143 /* It might seem that one insn could be saved by accessing
7144 the millicode function using the linkage table. However,
7145 this doesn't work in shared libraries and other dynamically
7146 loaded objects. Using a pc-relative sequence also avoids
7147 problems related to the implicit use of the gp register. */
7148 output_asm_insn ("b,l .+8,%%r1", xoperands);
7152 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7153 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7157 xoperands[1] = gen_label_rtx ();
7158 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7159 targetm.asm_out.internal_label (asm_out_file, "L",
7160 CODE_LABEL_NUMBER (xoperands[1]));
7161 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7164 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7166 else if (TARGET_PORTABLE_RUNTIME)
7168 /* Pure portable runtime doesn't allow be/ble; we also don't
7169 have PIC support in the assembler/linker, so this sequence
7172 /* Get the address of our target into %r1. */
7173 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7174 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7176 /* Get our return address into %r31. */
7177 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7178 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7180 /* Jump to our target address in %r1. */
7181 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7185 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7187 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7189 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7193 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7194 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7196 if (TARGET_SOM || !TARGET_GAS)
7198 /* The HP assembler can generate relocations for the
7199 difference of two symbols. GAS can do this for a
7200 millicode symbol but not an arbitrary external
7201 symbol when generating SOM output. */
7202 xoperands[1] = gen_label_rtx ();
7203 targetm.asm_out.internal_label (asm_out_file, "L",
7204 CODE_LABEL_NUMBER (xoperands[1]));
7205 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7206 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7210 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7211 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7215 /* Jump to our target address in %r1. */
7216 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7220 if (seq_length == 0)
7221 output_asm_insn ("nop", xoperands);
7223 /* We are done if there isn't a jump in the delay slot. */
7224 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7227 /* This call has an unconditional jump in its delay slot. */
7228 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7230 /* See if the return address can be adjusted. Use the containing
7231 sequence insn's address. */
7232 if (INSN_ADDRESSES_SET_P ())
7234 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7235 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7236 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7238 if (VAL_14_BITS_P (distance))
7240 xoperands[1] = gen_label_rtx ();
7241 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7242 targetm.asm_out.internal_label (asm_out_file, "L",
7243 CODE_LABEL_NUMBER (xoperands[1]));
7246 /* ??? This branch may not reach its target. */
7247 output_asm_insn ("nop\n\tb,n %0", xoperands);
7250 /* ??? This branch may not reach its target. */
7251 output_asm_insn ("nop\n\tb,n %0", xoperands);
7253 /* Delete the jump. */
7254 SET_INSN_DELETED (NEXT_INSN (insn));
7259 /* Return the attribute length of the call instruction INSN. The SIBCALL
7260 flag indicates whether INSN is a regular call or a sibling call. The
7261 length returned must be longer than the code actually generated by
7262 output_call. Since branch shortening is done before delay branch
7263 sequencing, there is no way to determine whether or not the delay
7264 slot will be filled during branch shortening. Even when the delay
7265 slot is filled, we may have to add a nop if the delay slot contains
7266 a branch that can't reach its target. Thus, we always have to include
7267 the delay slot in the length estimate. This used to be done in
7268 pa_adjust_insn_length but we do it here now as some sequences always
7269 fill the delay slot and we can save four bytes in the estimate for
7273 attr_length_call (rtx insn, int sibcall)
7279 rtx pat = PATTERN (insn);
7280 unsigned long distance = -1;
7282 if (INSN_ADDRESSES_SET_P ())
7284 unsigned long total;
7286 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7287 distance = (total + insn_current_reference_address (insn));
7288 if (distance < total)
7292 /* Determine if this is a local call. */
7293 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL)
7294 call_dest = XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0);
7296 call_dest = XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0);
7298 call_decl = SYMBOL_REF_DECL (call_dest);
7299 local_call = call_decl && targetm.binds_local_p (call_decl);
7301 /* pc-relative branch. */
7302 if (!TARGET_LONG_CALLS
7303 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7304 || distance < 240000))
7307 /* 64-bit plabel sequence. */
7308 else if (TARGET_64BIT && !local_call)
7309 length += sibcall ? 28 : 24;
7311 /* non-pic long absolute branch sequence. */
7312 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7315 /* long pc-relative branch sequence. */
7316 else if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7317 || (TARGET_64BIT && !TARGET_GAS)
7318 || (TARGET_GAS && !TARGET_SOM
7319 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7323 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS)
7327 /* 32-bit plabel sequence. */
7333 length += length_fp_args (insn);
7343 if (!TARGET_NO_SPACE_REGS)
7351 /* INSN is a function call. It may have an unconditional jump
7354 CALL_DEST is the routine we are calling. */
7357 output_call (rtx insn, rtx call_dest, int sibcall)
7359 int delay_insn_deleted = 0;
7360 int delay_slot_filled = 0;
7361 int seq_length = dbr_sequence_length ();
7362 tree call_decl = SYMBOL_REF_DECL (call_dest);
7363 int local_call = call_decl && targetm.binds_local_p (call_decl);
7366 xoperands[0] = call_dest;
7368 /* Handle the common case where we're sure that the branch will reach
7369 the beginning of the "$CODE$" subspace. This is the beginning of
7370 the current function if we are in a named section. */
7371 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7373 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7374 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7378 if (TARGET_64BIT && !local_call)
7380 /* ??? As far as I can tell, the HP linker doesn't support the
7381 long pc-relative sequence described in the 64-bit runtime
7382 architecture. So, we use a slightly longer indirect call. */
7383 xoperands[0] = get_deferred_plabel (call_dest);
7384 xoperands[1] = gen_label_rtx ();
7386 /* If this isn't a sibcall, we put the load of %r27 into the
7387 delay slot. We can't do this in a sibcall as we don't
7388 have a second call-clobbered scratch register available. */
7390 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7393 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7396 /* Now delete the delay insn. */
7397 SET_INSN_DELETED (NEXT_INSN (insn));
7398 delay_insn_deleted = 1;
7401 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7402 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7403 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7407 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7408 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7409 output_asm_insn ("bve (%%r1)", xoperands);
7413 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7414 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7415 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7416 delay_slot_filled = 1;
7421 int indirect_call = 0;
7423 /* Emit a long call. There are several different sequences
7424 of increasing length and complexity. In most cases,
7425 they don't allow an instruction in the delay slot. */
7426 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7427 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7428 && !(TARGET_GAS && !TARGET_SOM
7429 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7434 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7436 && (!TARGET_PA_20 || indirect_call))
7438 /* A non-jump insn in the delay slot. By definition we can
7439 emit this insn before the call (and in fact before argument
7441 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7444 /* Now delete the delay insn. */
7445 SET_INSN_DELETED (NEXT_INSN (insn));
7446 delay_insn_deleted = 1;
7449 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7451 /* This is the best sequence for making long calls in
7452 non-pic code. Unfortunately, GNU ld doesn't provide
7453 the stub needed for external calls, and GAS's support
7454 for this with the SOM linker is buggy. It is safe
7455 to use this for local calls. */
7456 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7458 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7462 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7465 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7467 output_asm_insn ("copy %%r31,%%r2", xoperands);
7468 delay_slot_filled = 1;
7473 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7474 || (TARGET_64BIT && !TARGET_GAS))
7476 /* The HP assembler and linker can handle relocations
7477 for the difference of two symbols. GAS and the HP
7478 linker can't do this when one of the symbols is
7480 xoperands[1] = gen_label_rtx ();
7481 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7482 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7483 targetm.asm_out.internal_label (asm_out_file, "L",
7484 CODE_LABEL_NUMBER (xoperands[1]));
7485 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7487 else if (TARGET_GAS && !TARGET_SOM
7488 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7490 /* GAS currently can't generate the relocations that
7491 are needed for the SOM linker under HP-UX using this
7492 sequence. The GNU linker doesn't generate the stubs
7493 that are needed for external calls on TARGET_ELF32
7494 with this sequence. For now, we have to use a
7495 longer plabel sequence when using GAS. */
7496 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7497 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7499 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7504 /* Emit a long plabel-based call sequence. This is
7505 essentially an inline implementation of $$dyncall.
7506 We don't actually try to call $$dyncall as this is
7507 as difficult as calling the function itself. */
7508 xoperands[0] = get_deferred_plabel (call_dest);
7509 xoperands[1] = gen_label_rtx ();
7511 /* Since the call is indirect, FP arguments in registers
7512 need to be copied to the general registers. Then, the
7513 argument relocation stub will copy them back. */
7515 copy_fp_args (insn);
7519 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7520 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7521 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7525 output_asm_insn ("addil LR'%0-$global$,%%r27",
7527 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7531 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7532 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7533 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7534 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7536 if (!sibcall && !TARGET_PA_20)
7538 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7539 if (TARGET_NO_SPACE_REGS)
7540 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7542 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7549 output_asm_insn ("bve (%%r1)", xoperands);
7554 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7555 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7556 delay_slot_filled = 1;
7559 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7564 if (!TARGET_NO_SPACE_REGS)
7565 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7570 if (TARGET_NO_SPACE_REGS)
7571 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7573 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7577 if (TARGET_NO_SPACE_REGS)
7578 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7580 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7583 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7585 output_asm_insn ("copy %%r31,%%r2", xoperands);
7586 delay_slot_filled = 1;
7593 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7594 output_asm_insn ("nop", xoperands);
7596 /* We are done if there isn't a jump in the delay slot. */
7598 || delay_insn_deleted
7599 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7602 /* A sibcall should never have a branch in the delay slot. */
7603 gcc_assert (!sibcall);
7605 /* This call has an unconditional jump in its delay slot. */
7606 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7608 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7610 /* See if the return address can be adjusted. Use the containing
7611 sequence insn's address. */
7612 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7613 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7614 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7616 if (VAL_14_BITS_P (distance))
7618 xoperands[1] = gen_label_rtx ();
7619 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7620 targetm.asm_out.internal_label (asm_out_file, "L",
7621 CODE_LABEL_NUMBER (xoperands[1]));
7624 output_asm_insn ("nop\n\tb,n %0", xoperands);
7627 output_asm_insn ("b,n %0", xoperands);
7629 /* Delete the jump. */
7630 SET_INSN_DELETED (NEXT_INSN (insn));
7635 /* Return the attribute length of the indirect call instruction INSN.
7636 The length must match the code generated by output_indirect call.
7637 The returned length includes the delay slot. Currently, the delay
7638 slot of an indirect call sequence is not exposed and it is used by
7639 the sequence itself. */
7642 attr_length_indirect_call (rtx insn)
7644 unsigned long distance = -1;
7645 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7647 if (INSN_ADDRESSES_SET_P ())
7649 distance = (total + insn_current_reference_address (insn));
7650 if (distance < total)
7657 if (TARGET_FAST_INDIRECT_CALLS
7658 || (!TARGET_PORTABLE_RUNTIME
7659 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7660 || distance < 240000)))
7666 if (TARGET_PORTABLE_RUNTIME)
7669 /* Out of reach, can use ble. */
7674 output_indirect_call (rtx insn, rtx call_dest)
7680 xoperands[0] = call_dest;
7681 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7682 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7686 /* First the special case for kernels, level 0 systems, etc. */
7687 if (TARGET_FAST_INDIRECT_CALLS)
7688 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7690 /* Now the normal case -- we can reach $$dyncall directly or
7691 we're sure that we can get there via a long-branch stub.
7693 No need to check target flags as the length uniquely identifies
7694 the remaining cases. */
7695 if (attr_length_indirect_call (insn) == 8)
7697 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7698 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
7699 variant of the B,L instruction can't be used on the SOM target. */
7700 if (TARGET_PA_20 && !TARGET_SOM)
7701 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7703 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7706 /* Long millicode call, but we are not generating PIC or portable runtime
7708 if (attr_length_indirect_call (insn) == 12)
7709 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7711 /* Long millicode call for portable runtime. */
7712 if (attr_length_indirect_call (insn) == 20)
7713 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7715 /* We need a long PIC call to $$dyncall. */
7716 xoperands[0] = NULL_RTX;
7717 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7718 if (TARGET_SOM || !TARGET_GAS)
7720 xoperands[0] = gen_label_rtx ();
7721 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7722 targetm.asm_out.internal_label (asm_out_file, "L",
7723 CODE_LABEL_NUMBER (xoperands[0]));
7724 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7728 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7729 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7732 output_asm_insn ("blr %%r0,%%r2", xoperands);
7733 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7737 /* Return the total length of the save and restore instructions needed for
7738 the data linkage table pointer (i.e., the PIC register) across the call
7739 instruction INSN. No-return calls do not require a save and restore.
7740 In addition, we may be able to avoid the save and restore for calls
7741 within the same translation unit. */
7744 attr_length_save_restore_dltp (rtx insn)
7746 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7752 /* In HPUX 8.0's shared library scheme, special relocations are needed
7753 for function labels if they might be passed to a function
7754 in a shared library (because shared libraries don't live in code
7755 space), and special magic is needed to construct their address. */
7758 hppa_encode_label (rtx sym)
7760 const char *str = XSTR (sym, 0);
7761 int len = strlen (str) + 1;
7764 p = newstr = alloca (len + 1);
7768 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7772 pa_encode_section_info (tree decl, rtx rtl, int first)
7774 default_encode_section_info (decl, rtl, first);
7776 if (first && TEXT_SPACE_P (decl))
7778 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7779 if (TREE_CODE (decl) == FUNCTION_DECL)
7780 hppa_encode_label (XEXP (rtl, 0));
7784 /* This is sort of inverse to pa_encode_section_info. */
7787 pa_strip_name_encoding (const char *str)
7789 str += (*str == '@');
7790 str += (*str == '*');
7795 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7797 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7800 /* Returns 1 if OP is a function label involved in a simple addition
7801 with a constant. Used to keep certain patterns from matching
7802 during instruction combination. */
7804 is_function_label_plus_const (rtx op)
7806 /* Strip off any CONST. */
7807 if (GET_CODE (op) == CONST)
7810 return (GET_CODE (op) == PLUS
7811 && function_label_operand (XEXP (op, 0), Pmode)
7812 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7815 /* Output assembly code for a thunk to FUNCTION. */
7818 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7819 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7822 static unsigned int current_thunk_number;
7823 int val_14 = VAL_14_BITS_P (delta);
7828 xoperands[0] = XEXP (DECL_RTL (function), 0);
7829 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
7830 xoperands[2] = GEN_INT (delta);
7832 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
7833 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7835 /* Output the thunk. We know that the function is in the same
7836 translation unit (i.e., the same space) as the thunk, and that
7837 thunks are output after their method. Thus, we don't need an
7838 external branch to reach the function. With SOM and GAS,
7839 functions and thunks are effectively in different sections.
7840 Thus, we can always use a IA-relative branch and the linker
7841 will add a long branch stub if necessary.
7843 However, we have to be careful when generating PIC code on the
7844 SOM port to ensure that the sequence does not transfer to an
7845 import stub for the target function as this could clobber the
7846 return value saved at SP-24. This would also apply to the
7847 32-bit linux port if the multi-space model is implemented. */
7848 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7849 && !(flag_pic && TREE_PUBLIC (function))
7850 && (TARGET_GAS || last_address < 262132))
7851 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7852 && ((targetm.have_named_sections
7853 && DECL_SECTION_NAME (thunk_fndecl) != NULL
7854 /* The GNU 64-bit linker has rather poor stub management.
7855 So, we use a long branch from thunks that aren't in
7856 the same section as the target function. */
7858 && (DECL_SECTION_NAME (thunk_fndecl)
7859 != DECL_SECTION_NAME (function)))
7860 || ((DECL_SECTION_NAME (thunk_fndecl)
7861 == DECL_SECTION_NAME (function))
7862 && last_address < 262132)))
7863 || (!targetm.have_named_sections && last_address < 262132))))
7866 output_asm_insn ("addil L'%2,%%r26", xoperands);
7868 output_asm_insn ("b %0", xoperands);
7872 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7877 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7881 else if (TARGET_64BIT)
7883 /* We only have one call-clobbered scratch register, so we can't
7884 make use of the delay slot if delta doesn't fit in 14 bits. */
7887 output_asm_insn ("addil L'%2,%%r26", xoperands);
7888 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7891 output_asm_insn ("b,l .+8,%%r1", xoperands);
7895 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7896 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7900 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
7901 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
7906 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7907 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7912 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
7916 else if (TARGET_PORTABLE_RUNTIME)
7918 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7919 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
7922 output_asm_insn ("addil L'%2,%%r26", xoperands);
7924 output_asm_insn ("bv %%r0(%%r22)", xoperands);
7928 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7933 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7937 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7939 /* The function is accessible from outside this module. The only
7940 way to avoid an import stub between the thunk and function is to
7941 call the function directly with an indirect sequence similar to
7942 that used by $$dyncall. This is possible because $$dyncall acts
7943 as the import stub in an indirect call. */
7944 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7945 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
7946 output_asm_insn ("addil LT'%3,%%r19", xoperands);
7947 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
7948 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
7949 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
7950 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
7951 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
7952 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
7956 output_asm_insn ("addil L'%2,%%r26", xoperands);
7962 output_asm_insn ("bve (%%r22)", xoperands);
7965 else if (TARGET_NO_SPACE_REGS)
7967 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
7972 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
7973 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
7974 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
7979 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7981 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7985 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7987 if (TARGET_SOM || !TARGET_GAS)
7989 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
7990 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
7994 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7995 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
7999 output_asm_insn ("addil L'%2,%%r26", xoperands);
8001 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8005 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8010 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8017 output_asm_insn ("addil L'%2,%%r26", xoperands);
8019 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8020 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8024 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8029 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8034 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8036 if (TARGET_SOM && TARGET_GAS)
8038 /* We done with this subspace except possibly for some additional
8039 debug information. Forget that we are in this subspace to ensure
8040 that the next function is output in its own subspace. */
8042 cfun->machine->in_nsubspa = 2;
8045 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8047 switch_to_section (data_section);
8048 output_asm_insn (".align 4", xoperands);
8049 ASM_OUTPUT_LABEL (file, label);
8050 output_asm_insn (".word P'%0", xoperands);
8053 current_thunk_number++;
8054 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8055 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8056 last_address += nbytes;
8057 update_total_code_bytes (nbytes);
8060 /* Only direct calls to static functions are allowed to be sibling (tail)
8063 This restriction is necessary because some linker generated stubs will
8064 store return pointers into rp' in some cases which might clobber a
8065 live value already in rp'.
8067 In a sibcall the current function and the target function share stack
8068 space. Thus if the path to the current function and the path to the
8069 target function save a value in rp', they save the value into the
8070 same stack slot, which has undesirable consequences.
8072 Because of the deferred binding nature of shared libraries any function
8073 with external scope could be in a different load module and thus require
8074 rp' to be saved when calling that function. So sibcall optimizations
8075 can only be safe for static function.
8077 Note that GCC never needs return value relocations, so we don't have to
8078 worry about static calls with return value relocations (which require
8081 It is safe to perform a sibcall optimization when the target function
8082 will never return. */
8084 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8086 if (TARGET_PORTABLE_RUNTIME)
8089 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8090 single subspace mode and the call is not indirect. As far as I know,
8091 there is no operating system support for the multiple subspace mode.
8092 It might be possible to support indirect calls if we didn't use
8093 $$dyncall (see the indirect sequence generated in output_call). */
8095 return (decl != NULL_TREE);
8097 /* Sibcalls are not ok because the arg pointer register is not a fixed
8098 register. This prevents the sibcall optimization from occurring. In
8099 addition, there are problems with stub placement using GNU ld. This
8100 is because a normal sibcall branch uses a 17-bit relocation while
8101 a regular call branch uses a 22-bit relocation. As a result, more
8102 care needs to be taken in the placement of long-branch stubs. */
8106 /* Sibcalls are only ok within a translation unit. */
8107 return (decl && !TREE_PUBLIC (decl));
8110 /* ??? Addition is not commutative on the PA due to the weird implicit
8111 space register selection rules for memory addresses. Therefore, we
8112 don't consider a + b == b + a, as this might be inside a MEM. */
8114 pa_commutative_p (rtx x, int outer_code)
8116 return (COMMUTATIVE_P (x)
8117 && (TARGET_NO_SPACE_REGS
8118 || (outer_code != UNKNOWN && outer_code != MEM)
8119 || GET_CODE (x) != PLUS));
8122 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8123 use in fmpyadd instructions. */
8125 fmpyaddoperands (rtx *operands)
8127 enum machine_mode mode = GET_MODE (operands[0]);
8129 /* Must be a floating point mode. */
8130 if (mode != SFmode && mode != DFmode)
8133 /* All modes must be the same. */
8134 if (! (mode == GET_MODE (operands[1])
8135 && mode == GET_MODE (operands[2])
8136 && mode == GET_MODE (operands[3])
8137 && mode == GET_MODE (operands[4])
8138 && mode == GET_MODE (operands[5])))
8141 /* All operands must be registers. */
8142 if (! (GET_CODE (operands[1]) == REG
8143 && GET_CODE (operands[2]) == REG
8144 && GET_CODE (operands[3]) == REG
8145 && GET_CODE (operands[4]) == REG
8146 && GET_CODE (operands[5]) == REG))
8149 /* Only 2 real operands to the addition. One of the input operands must
8150 be the same as the output operand. */
8151 if (! rtx_equal_p (operands[3], operands[4])
8152 && ! rtx_equal_p (operands[3], operands[5]))
8155 /* Inout operand of add cannot conflict with any operands from multiply. */
8156 if (rtx_equal_p (operands[3], operands[0])
8157 || rtx_equal_p (operands[3], operands[1])
8158 || rtx_equal_p (operands[3], operands[2]))
8161 /* multiply cannot feed into addition operands. */
8162 if (rtx_equal_p (operands[4], operands[0])
8163 || rtx_equal_p (operands[5], operands[0]))
8166 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8168 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8169 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8170 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8171 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8172 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8173 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8176 /* Passed. Operands are suitable for fmpyadd. */
8180 #if !defined(USE_COLLECT2)
8182 pa_asm_out_constructor (rtx symbol, int priority)
8184 if (!function_label_operand (symbol, VOIDmode))
8185 hppa_encode_label (symbol);
8187 #ifdef CTORS_SECTION_ASM_OP
8188 default_ctor_section_asm_out_constructor (symbol, priority);
8190 # ifdef TARGET_ASM_NAMED_SECTION
8191 default_named_section_asm_out_constructor (symbol, priority);
8193 default_stabs_asm_out_constructor (symbol, priority);
8199 pa_asm_out_destructor (rtx symbol, int priority)
8201 if (!function_label_operand (symbol, VOIDmode))
8202 hppa_encode_label (symbol);
8204 #ifdef DTORS_SECTION_ASM_OP
8205 default_dtor_section_asm_out_destructor (symbol, priority);
8207 # ifdef TARGET_ASM_NAMED_SECTION
8208 default_named_section_asm_out_destructor (symbol, priority);
8210 default_stabs_asm_out_destructor (symbol, priority);
8216 /* This function places uninitialized global data in the bss section.
8217 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8218 function on the SOM port to prevent uninitialized global data from
8219 being placed in the data section. */
8222 pa_asm_output_aligned_bss (FILE *stream,
8224 unsigned HOST_WIDE_INT size,
8227 switch_to_section (bss_section);
8228 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8230 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8231 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8234 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8235 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8238 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8239 ASM_OUTPUT_LABEL (stream, name);
8240 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8243 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8244 that doesn't allow the alignment of global common storage to be directly
8245 specified. The SOM linker aligns common storage based on the rounded
8246 value of the NUM_BYTES parameter in the .comm directive. It's not
8247 possible to use the .align directive as it doesn't affect the alignment
8248 of the label associated with a .comm directive. */
8251 pa_asm_output_aligned_common (FILE *stream,
8253 unsigned HOST_WIDE_INT size,
8256 unsigned int max_common_align;
8258 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8259 if (align > max_common_align)
8261 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8262 "for global common data. Using %u",
8263 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8264 align = max_common_align;
8267 switch_to_section (bss_section);
8269 assemble_name (stream, name);
8270 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8271 MAX (size, align / BITS_PER_UNIT));
8274 /* We can't use .comm for local common storage as the SOM linker effectively
8275 treats the symbol as universal and uses the same storage for local symbols
8276 with the same name in different object files. The .block directive
8277 reserves an uninitialized block of storage. However, it's not common
8278 storage. Fortunately, GCC never requests common storage with the same
8279 name in any given translation unit. */
8282 pa_asm_output_aligned_local (FILE *stream,
8284 unsigned HOST_WIDE_INT size,
8287 switch_to_section (bss_section);
8288 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8291 fprintf (stream, "%s", LOCAL_ASM_OP);
8292 assemble_name (stream, name);
8293 fprintf (stream, "\n");
8296 ASM_OUTPUT_LABEL (stream, name);
8297 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8300 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8301 use in fmpysub instructions. */
8303 fmpysuboperands (rtx *operands)
8305 enum machine_mode mode = GET_MODE (operands[0]);
8307 /* Must be a floating point mode. */
8308 if (mode != SFmode && mode != DFmode)
8311 /* All modes must be the same. */
8312 if (! (mode == GET_MODE (operands[1])
8313 && mode == GET_MODE (operands[2])
8314 && mode == GET_MODE (operands[3])
8315 && mode == GET_MODE (operands[4])
8316 && mode == GET_MODE (operands[5])))
8319 /* All operands must be registers. */
8320 if (! (GET_CODE (operands[1]) == REG
8321 && GET_CODE (operands[2]) == REG
8322 && GET_CODE (operands[3]) == REG
8323 && GET_CODE (operands[4]) == REG
8324 && GET_CODE (operands[5]) == REG))
8327 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8328 operation, so operands[4] must be the same as operand[3]. */
8329 if (! rtx_equal_p (operands[3], operands[4]))
8332 /* multiply cannot feed into subtraction. */
8333 if (rtx_equal_p (operands[5], operands[0]))
8336 /* Inout operand of sub cannot conflict with any operands from multiply. */
8337 if (rtx_equal_p (operands[3], operands[0])
8338 || rtx_equal_p (operands[3], operands[1])
8339 || rtx_equal_p (operands[3], operands[2]))
8342 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8344 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8345 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8346 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8347 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8348 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8349 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8352 /* Passed. Operands are suitable for fmpysub. */
8356 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8357 constants for shadd instructions. */
8359 shadd_constant_p (int val)
8361 if (val == 2 || val == 4 || val == 8)
8367 /* Return 1 if OP is valid as a base or index register in a
8371 borx_reg_operand (rtx op, enum machine_mode mode)
8373 if (GET_CODE (op) != REG)
8376 /* We must reject virtual registers as the only expressions that
8377 can be instantiated are REG and REG+CONST. */
8378 if (op == virtual_incoming_args_rtx
8379 || op == virtual_stack_vars_rtx
8380 || op == virtual_stack_dynamic_rtx
8381 || op == virtual_outgoing_args_rtx
8382 || op == virtual_cfa_rtx)
8385 /* While it's always safe to index off the frame pointer, it's not
8386 profitable to do so when the frame pointer is being eliminated. */
8387 if (!reload_completed
8388 && flag_omit_frame_pointer
8389 && !current_function_calls_alloca
8390 && op == frame_pointer_rtx)
8393 return register_operand (op, mode);
8396 /* Return 1 if this operand is anything other than a hard register. */
8399 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8401 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8404 /* Return 1 if INSN branches forward. Should be using insn_addresses
8405 to avoid walking through all the insns... */
8407 forward_branch_p (rtx insn)
8409 rtx label = JUMP_LABEL (insn);
8416 insn = NEXT_INSN (insn);
8419 return (insn == label);
8422 /* Return 1 if OP is an equality comparison, else return 0. */
8424 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8426 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8429 /* Return 1 if INSN is in the delay slot of a call instruction. */
8431 jump_in_call_delay (rtx insn)
8434 if (GET_CODE (insn) != JUMP_INSN)
8437 if (PREV_INSN (insn)
8438 && PREV_INSN (PREV_INSN (insn))
8439 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8441 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8443 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8444 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8451 /* Output an unconditional move and branch insn. */
8454 output_parallel_movb (rtx *operands, rtx insn)
8456 int length = get_attr_length (insn);
8458 /* These are the cases in which we win. */
8460 return "mov%I1b,tr %1,%0,%2";
8462 /* None of the following cases win, but they don't lose either. */
8465 if (dbr_sequence_length () == 0)
8467 /* Nothing in the delay slot, fake it by putting the combined
8468 insn (the copy or add) in the delay slot of a bl. */
8469 if (GET_CODE (operands[1]) == CONST_INT)
8470 return "b %2\n\tldi %1,%0";
8472 return "b %2\n\tcopy %1,%0";
8476 /* Something in the delay slot, but we've got a long branch. */
8477 if (GET_CODE (operands[1]) == CONST_INT)
8478 return "ldi %1,%0\n\tb %2";
8480 return "copy %1,%0\n\tb %2";
8484 if (GET_CODE (operands[1]) == CONST_INT)
8485 output_asm_insn ("ldi %1,%0", operands);
8487 output_asm_insn ("copy %1,%0", operands);
8488 return output_lbranch (operands[2], insn, 1);
8491 /* Output an unconditional add and branch insn. */
8494 output_parallel_addb (rtx *operands, rtx insn)
8496 int length = get_attr_length (insn);
8498 /* To make life easy we want operand0 to be the shared input/output
8499 operand and operand1 to be the readonly operand. */
8500 if (operands[0] == operands[1])
8501 operands[1] = operands[2];
8503 /* These are the cases in which we win. */
8505 return "add%I1b,tr %1,%0,%3";
8507 /* None of the following cases win, but they don't lose either. */
8510 if (dbr_sequence_length () == 0)
8511 /* Nothing in the delay slot, fake it by putting the combined
8512 insn (the copy or add) in the delay slot of a bl. */
8513 return "b %3\n\tadd%I1 %1,%0,%0";
8515 /* Something in the delay slot, but we've got a long branch. */
8516 return "add%I1 %1,%0,%0\n\tb %3";
8519 output_asm_insn ("add%I1 %1,%0,%0", operands);
8520 return output_lbranch (operands[3], insn, 1);
8523 /* Return nonzero if INSN (a jump insn) immediately follows a call
8524 to a named function. This is used to avoid filling the delay slot
8525 of the jump since it can usually be eliminated by modifying RP in
8526 the delay slot of the call. */
8529 following_call (rtx insn)
8531 if (! TARGET_JUMP_IN_DELAY)
8534 /* Find the previous real insn, skipping NOTEs. */
8535 insn = PREV_INSN (insn);
8536 while (insn && GET_CODE (insn) == NOTE)
8537 insn = PREV_INSN (insn);
8539 /* Check for CALL_INSNs and millicode calls. */
8541 && ((GET_CODE (insn) == CALL_INSN
8542 && get_attr_type (insn) != TYPE_DYNCALL)
8543 || (GET_CODE (insn) == INSN
8544 && GET_CODE (PATTERN (insn)) != SEQUENCE
8545 && GET_CODE (PATTERN (insn)) != USE
8546 && GET_CODE (PATTERN (insn)) != CLOBBER
8547 && get_attr_type (insn) == TYPE_MILLI)))
8553 /* We use this hook to perform a PA specific optimization which is difficult
8554 to do in earlier passes.
8556 We want the delay slots of branches within jump tables to be filled.
8557 None of the compiler passes at the moment even has the notion that a
8558 PA jump table doesn't contain addresses, but instead contains actual
8561 Because we actually jump into the table, the addresses of each entry
8562 must stay constant in relation to the beginning of the table (which
8563 itself must stay constant relative to the instruction to jump into
8564 it). I don't believe we can guarantee earlier passes of the compiler
8565 will adhere to those rules.
8567 So, late in the compilation process we find all the jump tables, and
8568 expand them into real code -- e.g. each entry in the jump table vector
8569 will get an appropriate label followed by a jump to the final target.
8571 Reorg and the final jump pass can then optimize these branches and
8572 fill their delay slots. We end up with smaller, more efficient code.
8574 The jump instructions within the table are special; we must be able
8575 to identify them during assembly output (if the jumps don't get filled
8576 we need to emit a nop rather than nullifying the delay slot)). We
8577 identify jumps in switch tables by using insns with the attribute
8578 type TYPE_BTABLE_BRANCH.
8580 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8581 insns. This serves two purposes, first it prevents jump.c from
8582 noticing that the last N entries in the table jump to the instruction
8583 immediately after the table and deleting the jumps. Second, those
8584 insns mark where we should emit .begin_brtab and .end_brtab directives
8585 when using GAS (allows for better link time optimizations). */
8592 remove_useless_addtr_insns (1);
8594 if (pa_cpu < PROCESSOR_8000)
8595 pa_combine_instructions ();
8598 /* This is fairly cheap, so always run it if optimizing. */
8599 if (optimize > 0 && !TARGET_BIG_SWITCH)
8601 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8602 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8604 rtx pattern, tmp, location, label;
8605 unsigned int length, i;
8607 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8608 if (GET_CODE (insn) != JUMP_INSN
8609 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8610 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8613 /* Emit marker for the beginning of the branch table. */
8614 emit_insn_before (gen_begin_brtab (), insn);
8616 pattern = PATTERN (insn);
8617 location = PREV_INSN (insn);
8618 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8620 for (i = 0; i < length; i++)
8622 /* Emit a label before each jump to keep jump.c from
8623 removing this code. */
8624 tmp = gen_label_rtx ();
8625 LABEL_NUSES (tmp) = 1;
8626 emit_label_after (tmp, location);
8627 location = NEXT_INSN (location);
8629 if (GET_CODE (pattern) == ADDR_VEC)
8630 label = XEXP (XVECEXP (pattern, 0, i), 0);
8632 label = XEXP (XVECEXP (pattern, 1, i), 0);
8634 tmp = gen_short_jump (label);
8636 /* Emit the jump itself. */
8637 tmp = emit_jump_insn_after (tmp, location);
8638 JUMP_LABEL (tmp) = label;
8639 LABEL_NUSES (label)++;
8640 location = NEXT_INSN (location);
8642 /* Emit a BARRIER after the jump. */
8643 emit_barrier_after (location);
8644 location = NEXT_INSN (location);
8647 /* Emit marker for the end of the branch table. */
8648 emit_insn_before (gen_end_brtab (), location);
8649 location = NEXT_INSN (location);
8650 emit_barrier_after (location);
8652 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8658 /* Still need brtab marker insns. FIXME: the presence of these
8659 markers disables output of the branch table to readonly memory,
8660 and any alignment directives that might be needed. Possibly,
8661 the begin_brtab insn should be output before the label for the
8662 table. This doesn't matter at the moment since the tables are
8663 always output in the text section. */
8664 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8666 /* Find an ADDR_VEC insn. */
8667 if (GET_CODE (insn) != JUMP_INSN
8668 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8669 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8672 /* Now generate markers for the beginning and end of the
8674 emit_insn_before (gen_begin_brtab (), insn);
8675 emit_insn_after (gen_end_brtab (), insn);
8680 /* The PA has a number of odd instructions which can perform multiple
8681 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8682 it may be profitable to combine two instructions into one instruction
8683 with two outputs. It's not profitable PA2.0 machines because the
8684 two outputs would take two slots in the reorder buffers.
8686 This routine finds instructions which can be combined and combines
8687 them. We only support some of the potential combinations, and we
8688 only try common ways to find suitable instructions.
8690 * addb can add two registers or a register and a small integer
8691 and jump to a nearby (+-8k) location. Normally the jump to the
8692 nearby location is conditional on the result of the add, but by
8693 using the "true" condition we can make the jump unconditional.
8694 Thus addb can perform two independent operations in one insn.
8696 * movb is similar to addb in that it can perform a reg->reg
8697 or small immediate->reg copy and jump to a nearby (+-8k location).
8699 * fmpyadd and fmpysub can perform a FP multiply and either an
8700 FP add or FP sub if the operands of the multiply and add/sub are
8701 independent (there are other minor restrictions). Note both
8702 the fmpy and fadd/fsub can in theory move to better spots according
8703 to data dependencies, but for now we require the fmpy stay at a
8706 * Many of the memory operations can perform pre & post updates
8707 of index registers. GCC's pre/post increment/decrement addressing
8708 is far too simple to take advantage of all the possibilities. This
8709 pass may not be suitable since those insns may not be independent.
8711 * comclr can compare two ints or an int and a register, nullify
8712 the following instruction and zero some other register. This
8713 is more difficult to use as it's harder to find an insn which
8714 will generate a comclr than finding something like an unconditional
8715 branch. (conditional moves & long branches create comclr insns).
8717 * Most arithmetic operations can conditionally skip the next
8718 instruction. They can be viewed as "perform this operation
8719 and conditionally jump to this nearby location" (where nearby
8720 is an insns away). These are difficult to use due to the
8721 branch length restrictions. */
8724 pa_combine_instructions (void)
8728 /* This can get expensive since the basic algorithm is on the
8729 order of O(n^2) (or worse). Only do it for -O2 or higher
8730 levels of optimization. */
8734 /* Walk down the list of insns looking for "anchor" insns which
8735 may be combined with "floating" insns. As the name implies,
8736 "anchor" instructions don't move, while "floating" insns may
8738 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8739 new = make_insn_raw (new);
8741 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8743 enum attr_pa_combine_type anchor_attr;
8744 enum attr_pa_combine_type floater_attr;
8746 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8747 Also ignore any special USE insns. */
8748 if ((GET_CODE (anchor) != INSN
8749 && GET_CODE (anchor) != JUMP_INSN
8750 && GET_CODE (anchor) != CALL_INSN)
8751 || GET_CODE (PATTERN (anchor)) == USE
8752 || GET_CODE (PATTERN (anchor)) == CLOBBER
8753 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8754 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8757 anchor_attr = get_attr_pa_combine_type (anchor);
8758 /* See if anchor is an insn suitable for combination. */
8759 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8760 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8761 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8762 && ! forward_branch_p (anchor)))
8766 for (floater = PREV_INSN (anchor);
8768 floater = PREV_INSN (floater))
8770 if (GET_CODE (floater) == NOTE
8771 || (GET_CODE (floater) == INSN
8772 && (GET_CODE (PATTERN (floater)) == USE
8773 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8776 /* Anything except a regular INSN will stop our search. */
8777 if (GET_CODE (floater) != INSN
8778 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8779 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8785 /* See if FLOATER is suitable for combination with the
8787 floater_attr = get_attr_pa_combine_type (floater);
8788 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8789 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8790 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8791 && floater_attr == PA_COMBINE_TYPE_FMPY))
8793 /* If ANCHOR and FLOATER can be combined, then we're
8794 done with this pass. */
8795 if (pa_can_combine_p (new, anchor, floater, 0,
8796 SET_DEST (PATTERN (floater)),
8797 XEXP (SET_SRC (PATTERN (floater)), 0),
8798 XEXP (SET_SRC (PATTERN (floater)), 1)))
8802 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8803 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8805 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8807 if (pa_can_combine_p (new, anchor, floater, 0,
8808 SET_DEST (PATTERN (floater)),
8809 XEXP (SET_SRC (PATTERN (floater)), 0),
8810 XEXP (SET_SRC (PATTERN (floater)), 1)))
8815 if (pa_can_combine_p (new, anchor, floater, 0,
8816 SET_DEST (PATTERN (floater)),
8817 SET_SRC (PATTERN (floater)),
8818 SET_SRC (PATTERN (floater))))
8824 /* If we didn't find anything on the backwards scan try forwards. */
8826 && (anchor_attr == PA_COMBINE_TYPE_FMPY
8827 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8829 for (floater = anchor; floater; floater = NEXT_INSN (floater))
8831 if (GET_CODE (floater) == NOTE
8832 || (GET_CODE (floater) == INSN
8833 && (GET_CODE (PATTERN (floater)) == USE
8834 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8838 /* Anything except a regular INSN will stop our search. */
8839 if (GET_CODE (floater) != INSN
8840 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8841 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8847 /* See if FLOATER is suitable for combination with the
8849 floater_attr = get_attr_pa_combine_type (floater);
8850 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8851 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8852 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8853 && floater_attr == PA_COMBINE_TYPE_FMPY))
8855 /* If ANCHOR and FLOATER can be combined, then we're
8856 done with this pass. */
8857 if (pa_can_combine_p (new, anchor, floater, 1,
8858 SET_DEST (PATTERN (floater)),
8859 XEXP (SET_SRC (PATTERN (floater)),
8861 XEXP (SET_SRC (PATTERN (floater)),
8868 /* FLOATER will be nonzero if we found a suitable floating
8869 insn for combination with ANCHOR. */
8871 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8872 || anchor_attr == PA_COMBINE_TYPE_FMPY))
8874 /* Emit the new instruction and delete the old anchor. */
8875 emit_insn_before (gen_rtx_PARALLEL
8877 gen_rtvec (2, PATTERN (anchor),
8878 PATTERN (floater))),
8881 SET_INSN_DELETED (anchor);
8883 /* Emit a special USE insn for FLOATER, then delete
8884 the floating insn. */
8885 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8886 delete_insn (floater);
8891 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8894 /* Emit the new_jump instruction and delete the old anchor. */
8896 = emit_jump_insn_before (gen_rtx_PARALLEL
8898 gen_rtvec (2, PATTERN (anchor),
8899 PATTERN (floater))),
8902 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8903 SET_INSN_DELETED (anchor);
8905 /* Emit a special USE insn for FLOATER, then delete
8906 the floating insn. */
8907 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8908 delete_insn (floater);
8916 pa_can_combine_p (rtx new, rtx anchor, rtx floater, int reversed, rtx dest,
8919 int insn_code_number;
8922 /* Create a PARALLEL with the patterns of ANCHOR and
8923 FLOATER, try to recognize it, then test constraints
8924 for the resulting pattern.
8926 If the pattern doesn't match or the constraints
8927 aren't met keep searching for a suitable floater
8929 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8930 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8931 INSN_CODE (new) = -1;
8932 insn_code_number = recog_memoized (new);
8933 if (insn_code_number < 0
8934 || (extract_insn (new), ! constrain_operands (1)))
8948 /* There's up to three operands to consider. One
8949 output and two inputs.
8951 The output must not be used between FLOATER & ANCHOR
8952 exclusive. The inputs must not be set between
8953 FLOATER and ANCHOR exclusive. */
8955 if (reg_used_between_p (dest, start, end))
8958 if (reg_set_between_p (src1, start, end))
8961 if (reg_set_between_p (src2, start, end))
8964 /* If we get here, then everything is good. */
8968 /* Return nonzero if references for INSN are delayed.
8970 Millicode insns are actually function calls with some special
8971 constraints on arguments and register usage.
8973 Millicode calls always expect their arguments in the integer argument
8974 registers, and always return their result in %r29 (ret1). They
8975 are expected to clobber their arguments, %r1, %r29, and the return
8976 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8978 This function tells reorg that the references to arguments and
8979 millicode calls do not appear to happen until after the millicode call.
8980 This allows reorg to put insns which set the argument registers into the
8981 delay slot of the millicode call -- thus they act more like traditional
8984 Note we cannot consider side effects of the insn to be delayed because
8985 the branch and link insn will clobber the return pointer. If we happened
8986 to use the return pointer in the delay slot of the call, then we lose.
8988 get_attr_type will try to recognize the given insn, so make sure to
8989 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8992 insn_refs_are_delayed (rtx insn)
8994 return ((GET_CODE (insn) == INSN
8995 && GET_CODE (PATTERN (insn)) != SEQUENCE
8996 && GET_CODE (PATTERN (insn)) != USE
8997 && GET_CODE (PATTERN (insn)) != CLOBBER
8998 && get_attr_type (insn) == TYPE_MILLI));
9001 /* On the HP-PA the value is found in register(s) 28(-29), unless
9002 the mode is SF or DF. Then the value is returned in fr4 (32).
9004 This must perform the same promotions as PROMOTE_MODE, else
9005 TARGET_PROMOTE_FUNCTION_RETURN will not work correctly.
9007 Small structures must be returned in a PARALLEL on PA64 in order
9008 to match the HP Compiler ABI. */
9011 function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
9013 enum machine_mode valmode;
9015 if (AGGREGATE_TYPE_P (valtype)
9016 || TREE_CODE (valtype) == COMPLEX_TYPE
9017 || TREE_CODE (valtype) == VECTOR_TYPE)
9021 /* Aggregates with a size less than or equal to 128 bits are
9022 returned in GR 28(-29). They are left justified. The pad
9023 bits are undefined. Larger aggregates are returned in
9027 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9029 for (i = 0; i < ub; i++)
9031 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9032 gen_rtx_REG (DImode, 28 + i),
9037 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9039 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9041 /* Aggregates 5 to 8 bytes in size are returned in general
9042 registers r28-r29 in the same manner as other non
9043 floating-point objects. The data is right-justified and
9044 zero-extended to 64 bits. This is opposite to the normal
9045 justification used on big endian targets and requires
9046 special treatment. */
9047 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9048 gen_rtx_REG (DImode, 28), const0_rtx);
9049 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9053 if ((INTEGRAL_TYPE_P (valtype)
9054 && TYPE_PRECISION (valtype) < BITS_PER_WORD)
9055 || POINTER_TYPE_P (valtype))
9056 valmode = word_mode;
9058 valmode = TYPE_MODE (valtype);
9060 if (TREE_CODE (valtype) == REAL_TYPE
9061 && !AGGREGATE_TYPE_P (valtype)
9062 && TYPE_MODE (valtype) != TFmode
9063 && !TARGET_SOFT_FLOAT)
9064 return gen_rtx_REG (valmode, 32);
9066 return gen_rtx_REG (valmode, 28);
9069 /* Return the location of a parameter that is passed in a register or NULL
9070 if the parameter has any component that is passed in memory.
9072 This is new code and will be pushed to into the net sources after
9075 ??? We might want to restructure this so that it looks more like other
9078 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
9079 int named ATTRIBUTE_UNUSED)
9081 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9088 if (mode == VOIDmode)
9091 arg_size = FUNCTION_ARG_SIZE (mode, type);
9093 /* If this arg would be passed partially or totally on the stack, then
9094 this routine should return zero. pa_arg_partial_bytes will
9095 handle arguments which are split between regs and stack slots if
9096 the ABI mandates split arguments. */
9099 /* The 32-bit ABI does not split arguments. */
9100 if (cum->words + arg_size > max_arg_words)
9106 alignment = cum->words & 1;
9107 if (cum->words + alignment >= max_arg_words)
9111 /* The 32bit ABIs and the 64bit ABIs are rather different,
9112 particularly in their handling of FP registers. We might
9113 be able to cleverly share code between them, but I'm not
9114 going to bother in the hope that splitting them up results
9115 in code that is more easily understood. */
9119 /* Advance the base registers to their current locations.
9121 Remember, gprs grow towards smaller register numbers while
9122 fprs grow to higher register numbers. Also remember that
9123 although FP regs are 32-bit addressable, we pretend that
9124 the registers are 64-bits wide. */
9125 gpr_reg_base = 26 - cum->words;
9126 fpr_reg_base = 32 + cum->words;
9128 /* Arguments wider than one word and small aggregates need special
9132 || (type && (AGGREGATE_TYPE_P (type)
9133 || TREE_CODE (type) == COMPLEX_TYPE
9134 || TREE_CODE (type) == VECTOR_TYPE)))
9136 /* Double-extended precision (80-bit), quad-precision (128-bit)
9137 and aggregates including complex numbers are aligned on
9138 128-bit boundaries. The first eight 64-bit argument slots
9139 are associated one-to-one, with general registers r26
9140 through r19, and also with floating-point registers fr4
9141 through fr11. Arguments larger than one word are always
9142 passed in general registers.
9144 Using a PARALLEL with a word mode register results in left
9145 justified data on a big-endian target. */
9148 int i, offset = 0, ub = arg_size;
9150 /* Align the base register. */
9151 gpr_reg_base -= alignment;
9153 ub = MIN (ub, max_arg_words - cum->words - alignment);
9154 for (i = 0; i < ub; i++)
9156 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9157 gen_rtx_REG (DImode, gpr_reg_base),
9163 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9168 /* If the argument is larger than a word, then we know precisely
9169 which registers we must use. */
9183 /* Structures 5 to 8 bytes in size are passed in the general
9184 registers in the same manner as other non floating-point
9185 objects. The data is right-justified and zero-extended
9186 to 64 bits. This is opposite to the normal justification
9187 used on big endian targets and requires special treatment.
9188 We now define BLOCK_REG_PADDING to pad these objects.
9189 Aggregates, complex and vector types are passed in the same
9190 manner as structures. */
9192 || (type && (AGGREGATE_TYPE_P (type)
9193 || TREE_CODE (type) == COMPLEX_TYPE
9194 || TREE_CODE (type) == VECTOR_TYPE)))
9196 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9197 gen_rtx_REG (DImode, gpr_reg_base),
9199 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9204 /* We have a single word (32 bits). A simple computation
9205 will get us the register #s we need. */
9206 gpr_reg_base = 26 - cum->words;
9207 fpr_reg_base = 32 + 2 * cum->words;
9211 /* Determine if the argument needs to be passed in both general and
9212 floating point registers. */
9213 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9214 /* If we are doing soft-float with portable runtime, then there
9215 is no need to worry about FP regs. */
9216 && !TARGET_SOFT_FLOAT
9217 /* The parameter must be some kind of scalar float, else we just
9218 pass it in integer registers. */
9219 && GET_MODE_CLASS (mode) == MODE_FLOAT
9220 /* The target function must not have a prototype. */
9221 && cum->nargs_prototype <= 0
9222 /* libcalls do not need to pass items in both FP and general
9224 && type != NULL_TREE
9225 /* All this hair applies to "outgoing" args only. This includes
9226 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9228 /* Also pass outgoing floating arguments in both registers in indirect
9229 calls with the 32 bit ABI and the HP assembler since there is no
9230 way to the specify argument locations in static functions. */
9235 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9241 gen_rtx_EXPR_LIST (VOIDmode,
9242 gen_rtx_REG (mode, fpr_reg_base),
9244 gen_rtx_EXPR_LIST (VOIDmode,
9245 gen_rtx_REG (mode, gpr_reg_base),
9250 /* See if we should pass this parameter in a general register. */
9251 if (TARGET_SOFT_FLOAT
9252 /* Indirect calls in the normal 32bit ABI require all arguments
9253 to be passed in general registers. */
9254 || (!TARGET_PORTABLE_RUNTIME
9258 /* If the parameter is not a scalar floating-point parameter,
9259 then it belongs in GPRs. */
9260 || GET_MODE_CLASS (mode) != MODE_FLOAT
9261 /* Structure with single SFmode field belongs in GPR. */
9262 || (type && AGGREGATE_TYPE_P (type)))
9263 retval = gen_rtx_REG (mode, gpr_reg_base);
9265 retval = gen_rtx_REG (mode, fpr_reg_base);
9271 /* If this arg would be passed totally in registers or totally on the stack,
9272 then this routine should return zero. */
9275 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9276 tree type, bool named ATTRIBUTE_UNUSED)
9278 unsigned int max_arg_words = 8;
9279 unsigned int offset = 0;
9284 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9287 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9288 /* Arg fits fully into registers. */
9290 else if (cum->words + offset >= max_arg_words)
9291 /* Arg fully on the stack. */
9295 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9299 /* A get_unnamed_section callback for switching to the text section.
9301 This function is only used with SOM. Because we don't support
9302 named subspaces, we can only create a new subspace or switch back
9303 to the default text subspace. */
9306 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9308 gcc_assert (TARGET_SOM);
9311 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9313 /* We only want to emit a .nsubspa directive once at the
9314 start of the function. */
9315 cfun->machine->in_nsubspa = 1;
9317 /* Create a new subspace for the text. This provides
9318 better stub placement and one-only functions. */
9320 && DECL_ONE_ONLY (cfun->decl)
9321 && !DECL_WEAK (cfun->decl))
9323 output_section_asm_op ("\t.SPACE $TEXT$\n"
9324 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9325 "ACCESS=44,SORT=24,COMDAT");
9331 /* There isn't a current function or the body of the current
9332 function has been completed. So, we are changing to the
9333 text section to output debugging information. Thus, we
9334 need to forget that we are in the text section so that
9335 varasm.c will call us when text_section is selected again. */
9336 gcc_assert (!cfun || !cfun->machine
9337 || cfun->machine->in_nsubspa == 2);
9340 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9343 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9346 /* A get_unnamed_section callback for switching to comdat data
9347 sections. This function is only used with SOM. */
9350 som_output_comdat_data_section_asm_op (const void *data)
9353 output_section_asm_op (data);
9356 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9359 pa_som_asm_init_sections (void)
9362 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9364 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9365 is not being generated. */
9366 som_readonly_data_section
9367 = get_unnamed_section (0, output_section_asm_op,
9368 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9370 /* When secondary definitions are not supported, SOM makes readonly
9371 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9373 som_one_only_readonly_data_section
9374 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9376 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9377 "ACCESS=0x2c,SORT=16,COMDAT");
9380 /* When secondary definitions are not supported, SOM makes data one-only
9381 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9382 som_one_only_data_section
9383 = get_unnamed_section (SECTION_WRITE,
9384 som_output_comdat_data_section_asm_op,
9385 "\t.SPACE $PRIVATE$\n"
9386 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9387 "ACCESS=31,SORT=24,COMDAT");
9389 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9390 which reference data within the $TEXT$ space (for example constant
9391 strings in the $LIT$ subspace).
9393 The assemblers (GAS and HP as) both have problems with handling
9394 the difference of two symbols which is the other correct way to
9395 reference constant data during PIC code generation.
9397 So, there's no way to reference constant data which is in the
9398 $TEXT$ space during PIC generation. Instead place all constant
9399 data into the $PRIVATE$ subspace (this reduces sharing, but it
9400 works correctly). */
9401 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9403 /* We must not have a reference to an external symbol defined in a
9404 shared library in a readonly section, else the SOM linker will
9407 So, we force exception information into the data section. */
9408 exception_section = data_section;
9411 /* On hpux10, the linker will give an error if we have a reference
9412 in the read-only data section to a symbol defined in a shared
9413 library. Therefore, expressions that might require a reloc can
9414 not be placed in the read-only data section. */
9417 pa_select_section (tree exp, int reloc,
9418 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9420 if (TREE_CODE (exp) == VAR_DECL
9421 && TREE_READONLY (exp)
9422 && !TREE_THIS_VOLATILE (exp)
9423 && DECL_INITIAL (exp)
9424 && (DECL_INITIAL (exp) == error_mark_node
9425 || TREE_CONSTANT (DECL_INITIAL (exp)))
9429 && DECL_ONE_ONLY (exp)
9430 && !DECL_WEAK (exp))
9431 return som_one_only_readonly_data_section;
9433 return readonly_data_section;
9435 else if (CONSTANT_CLASS_P (exp) && !reloc)
9436 return readonly_data_section;
9438 && TREE_CODE (exp) == VAR_DECL
9439 && DECL_ONE_ONLY (exp)
9440 && !DECL_WEAK (exp))
9441 return som_one_only_data_section;
9443 return data_section;
9447 pa_globalize_label (FILE *stream, const char *name)
9449 /* We only handle DATA objects here, functions are globalized in
9450 ASM_DECLARE_FUNCTION_NAME. */
9451 if (! FUNCTION_NAME_P (name))
9453 fputs ("\t.EXPORT ", stream);
9454 assemble_name (stream, name);
9455 fputs (",DATA\n", stream);
9459 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9462 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9463 int incoming ATTRIBUTE_UNUSED)
9465 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9468 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9471 pa_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
9473 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9474 PA64 ABI says that objects larger than 128 bits are returned in memory.
9475 Note, int_size_in_bytes can return -1 if the size of the object is
9476 variable or larger than the maximum value that can be expressed as
9477 a HOST_WIDE_INT. It can also return zero for an empty type. The
9478 simplest way to handle variable and empty types is to pass them in
9479 memory. This avoids problems in defining the boundaries of argument
9480 slots, allocating registers, etc. */
9481 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9482 || int_size_in_bytes (type) <= 0);
9485 /* Structure to hold declaration and name of external symbols that are
9486 emitted by GCC. We generate a vector of these symbols and output them
9487 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9488 This avoids putting out names that are never really used. */
9490 typedef struct extern_symbol GTY(())
9496 /* Define gc'd vector type for extern_symbol. */
9497 DEF_VEC_O(extern_symbol);
9498 DEF_VEC_ALLOC_O(extern_symbol,gc);
9500 /* Vector of extern_symbol pointers. */
9501 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9503 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9504 /* Mark DECL (name NAME) as an external reference (assembler output
9505 file FILE). This saves the names to output at the end of the file
9506 if actually referenced. */
9509 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9511 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9513 gcc_assert (file == asm_out_file);
9518 /* Output text required at the end of an assembler file.
9519 This includes deferred plabels and .import directives for
9520 all external symbols that were actually referenced. */
9523 pa_hpux_file_end (void)
9528 if (!NO_DEFERRED_PROFILE_COUNTERS)
9529 output_deferred_profile_counters ();
9531 output_deferred_plabels ();
9533 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9535 tree decl = p->decl;
9537 if (!TREE_ASM_WRITTEN (decl)
9538 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9539 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9542 VEC_free (extern_symbol, gc, extern_symbols);