1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "insn-attr.h"
40 #include "integrate.h"
42 #include "diagnostic-core.h"
49 #include "target-def.h"
52 /* Return nonzero if there is a bypass for the output of
53 OUT_INSN and the fp store IN_INSN. */
55 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
57 enum machine_mode store_mode;
58 enum machine_mode other_mode;
61 if (recog_memoized (in_insn) < 0
62 || (get_attr_type (in_insn) != TYPE_FPSTORE
63 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
64 || recog_memoized (out_insn) < 0)
67 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
69 set = single_set (out_insn);
73 other_mode = GET_MODE (SET_SRC (set));
75 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
79 #ifndef DO_FRAME_NOTES
80 #ifdef INCOMING_RETURN_ADDR_RTX
81 #define DO_FRAME_NOTES 1
83 #define DO_FRAME_NOTES 0
87 static void pa_option_override (void);
88 static void copy_reg_pointer (rtx, rtx);
89 static void fix_range (const char *);
90 static bool pa_handle_option (size_t, const char *, int);
91 static int hppa_register_move_cost (enum machine_mode mode, reg_class_t,
93 static int hppa_address_cost (rtx, bool);
94 static bool hppa_rtx_costs (rtx, int, int, int *, bool);
95 static inline rtx force_mode (enum machine_mode, rtx);
96 static void pa_reorg (void);
97 static void pa_combine_instructions (void);
98 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
99 static bool forward_branch_p (rtx);
100 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
101 static int compute_movmem_length (rtx);
102 static int compute_clrmem_length (rtx);
103 static bool pa_assemble_integer (rtx, unsigned int, int);
104 static void remove_useless_addtr_insns (int);
105 static void store_reg (int, HOST_WIDE_INT, int);
106 static void store_reg_modify (int, int, HOST_WIDE_INT);
107 static void load_reg (int, HOST_WIDE_INT, int);
108 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
109 static rtx pa_function_value (const_tree, const_tree, bool);
110 static rtx pa_libcall_value (enum machine_mode, const_rtx);
111 static bool pa_function_value_regno_p (const unsigned int);
112 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
113 static void update_total_code_bytes (unsigned int);
114 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
115 static int pa_adjust_cost (rtx, rtx, rtx, int);
116 static int pa_adjust_priority (rtx, int);
117 static int pa_issue_rate (void);
118 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
119 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
121 static void pa_encode_section_info (tree, rtx, int);
122 static const char *pa_strip_name_encoding (const char *);
123 static bool pa_function_ok_for_sibcall (tree, tree);
124 static void pa_globalize_label (FILE *, const char *)
126 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
127 HOST_WIDE_INT, tree);
128 #if !defined(USE_COLLECT2)
129 static void pa_asm_out_constructor (rtx, int);
130 static void pa_asm_out_destructor (rtx, int);
132 static void pa_init_builtins (void);
133 static rtx hppa_builtin_saveregs (void);
134 static void hppa_va_start (tree, rtx);
135 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
136 static bool pa_scalar_mode_supported_p (enum machine_mode);
137 static bool pa_commutative_p (const_rtx x, int outer_code);
138 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
139 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
140 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
141 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
142 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
143 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
144 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
145 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
146 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
147 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
148 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
149 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
150 static void output_deferred_plabels (void);
151 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
152 #ifdef ASM_OUTPUT_EXTERNAL_REAL
153 static void pa_hpux_file_end (void);
155 #ifdef HPUX_LONG_DOUBLE_LIBRARY
156 static void pa_hpux_init_libfuncs (void);
158 static rtx pa_struct_value_rtx (tree, int);
159 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
161 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
163 static void pa_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
165 static rtx pa_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
167 static unsigned int pa_function_arg_boundary (enum machine_mode, const_tree);
168 static struct machine_function * pa_init_machine_status (void);
169 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
171 secondary_reload_info *);
172 static void pa_extra_live_on_entry (bitmap);
173 static enum machine_mode pa_promote_function_mode (const_tree,
174 enum machine_mode, int *,
177 static void pa_asm_trampoline_template (FILE *);
178 static void pa_trampoline_init (rtx, tree, rtx);
179 static rtx pa_trampoline_adjust_address (rtx);
180 static rtx pa_delegitimize_address (rtx);
181 static bool pa_print_operand_punct_valid_p (unsigned char);
182 static rtx pa_internal_arg_pointer (void);
183 static bool pa_can_eliminate (const int, const int);
185 /* The following extra sections are only used for SOM. */
186 static GTY(()) section *som_readonly_data_section;
187 static GTY(()) section *som_one_only_readonly_data_section;
188 static GTY(()) section *som_one_only_data_section;
190 /* Which cpu we are scheduling for. */
191 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
193 /* The UNIX standard to use for predefines and linking. */
194 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
196 /* Counts for the number of callee-saved general and floating point
197 registers which were saved by the current function's prologue. */
198 static int gr_saved, fr_saved;
200 /* Boolean indicating whether the return pointer was saved by the
201 current function's prologue. */
202 static bool rp_saved;
204 static rtx find_addr_reg (rtx);
206 /* Keep track of the number of bytes we have output in the CODE subspace
207 during this compilation so we'll know when to emit inline long-calls. */
208 unsigned long total_code_bytes;
210 /* The last address of the previous function plus the number of bytes in
211 associated thunks that have been output. This is used to determine if
212 a thunk can use an IA-relative branch to reach its target function. */
213 static unsigned int last_address;
215 /* Variables to handle plabels that we discover are necessary at assembly
216 output time. They are output after the current function. */
217 struct GTY(()) deferred_plabel
222 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
224 static size_t n_deferred_plabels = 0;
226 /* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */
227 static const struct default_options pa_option_optimization_table[] =
229 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
230 { OPT_LEVELS_NONE, 0, NULL, 0 }
234 /* Initialize the GCC target structure. */
236 #undef TARGET_OPTION_OVERRIDE
237 #define TARGET_OPTION_OVERRIDE pa_option_override
238 #undef TARGET_OPTION_OPTIMIZATION_TABLE
239 #define TARGET_OPTION_OPTIMIZATION_TABLE pa_option_optimization_table
241 #undef TARGET_ASM_ALIGNED_HI_OP
242 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
243 #undef TARGET_ASM_ALIGNED_SI_OP
244 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
245 #undef TARGET_ASM_ALIGNED_DI_OP
246 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
247 #undef TARGET_ASM_UNALIGNED_HI_OP
248 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
249 #undef TARGET_ASM_UNALIGNED_SI_OP
250 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
251 #undef TARGET_ASM_UNALIGNED_DI_OP
252 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
253 #undef TARGET_ASM_INTEGER
254 #define TARGET_ASM_INTEGER pa_assemble_integer
256 #undef TARGET_ASM_FUNCTION_PROLOGUE
257 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
258 #undef TARGET_ASM_FUNCTION_EPILOGUE
259 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
261 #undef TARGET_FUNCTION_VALUE
262 #define TARGET_FUNCTION_VALUE pa_function_value
263 #undef TARGET_LIBCALL_VALUE
264 #define TARGET_LIBCALL_VALUE pa_libcall_value
265 #undef TARGET_FUNCTION_VALUE_REGNO_P
266 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
268 #undef TARGET_LEGITIMIZE_ADDRESS
269 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
271 #undef TARGET_SCHED_ADJUST_COST
272 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
273 #undef TARGET_SCHED_ADJUST_PRIORITY
274 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
275 #undef TARGET_SCHED_ISSUE_RATE
276 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
278 #undef TARGET_ENCODE_SECTION_INFO
279 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
280 #undef TARGET_STRIP_NAME_ENCODING
281 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
283 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
284 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
286 #undef TARGET_COMMUTATIVE_P
287 #define TARGET_COMMUTATIVE_P pa_commutative_p
289 #undef TARGET_ASM_OUTPUT_MI_THUNK
290 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
291 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
292 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
294 #undef TARGET_ASM_FILE_END
295 #ifdef ASM_OUTPUT_EXTERNAL_REAL
296 #define TARGET_ASM_FILE_END pa_hpux_file_end
298 #define TARGET_ASM_FILE_END output_deferred_plabels
301 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
302 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
304 #if !defined(USE_COLLECT2)
305 #undef TARGET_ASM_CONSTRUCTOR
306 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
307 #undef TARGET_ASM_DESTRUCTOR
308 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
311 #undef TARGET_DEFAULT_TARGET_FLAGS
312 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
313 #undef TARGET_HANDLE_OPTION
314 #define TARGET_HANDLE_OPTION pa_handle_option
316 #undef TARGET_INIT_BUILTINS
317 #define TARGET_INIT_BUILTINS pa_init_builtins
319 #undef TARGET_REGISTER_MOVE_COST
320 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
321 #undef TARGET_RTX_COSTS
322 #define TARGET_RTX_COSTS hppa_rtx_costs
323 #undef TARGET_ADDRESS_COST
324 #define TARGET_ADDRESS_COST hppa_address_cost
326 #undef TARGET_MACHINE_DEPENDENT_REORG
327 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
329 #ifdef HPUX_LONG_DOUBLE_LIBRARY
330 #undef TARGET_INIT_LIBFUNCS
331 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
334 #undef TARGET_PROMOTE_FUNCTION_MODE
335 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
336 #undef TARGET_PROMOTE_PROTOTYPES
337 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
339 #undef TARGET_STRUCT_VALUE_RTX
340 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
341 #undef TARGET_RETURN_IN_MEMORY
342 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
343 #undef TARGET_MUST_PASS_IN_STACK
344 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
345 #undef TARGET_PASS_BY_REFERENCE
346 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
347 #undef TARGET_CALLEE_COPIES
348 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
349 #undef TARGET_ARG_PARTIAL_BYTES
350 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
351 #undef TARGET_FUNCTION_ARG
352 #define TARGET_FUNCTION_ARG pa_function_arg
353 #undef TARGET_FUNCTION_ARG_ADVANCE
354 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
355 #undef TARGET_FUNCTION_ARG_BOUNDARY
356 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
358 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
359 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
360 #undef TARGET_EXPAND_BUILTIN_VA_START
361 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
362 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
363 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
365 #undef TARGET_SCALAR_MODE_SUPPORTED_P
366 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
368 #undef TARGET_CANNOT_FORCE_CONST_MEM
369 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
371 #undef TARGET_SECONDARY_RELOAD
372 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
374 #undef TARGET_EXTRA_LIVE_ON_ENTRY
375 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
377 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
378 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
379 #undef TARGET_TRAMPOLINE_INIT
380 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
381 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
382 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
383 #undef TARGET_DELEGITIMIZE_ADDRESS
384 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
385 #undef TARGET_INTERNAL_ARG_POINTER
386 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
387 #undef TARGET_CAN_ELIMINATE
388 #define TARGET_CAN_ELIMINATE pa_can_eliminate
390 struct gcc_target targetm = TARGET_INITIALIZER;
392 /* Parse the -mfixed-range= option string. */
395 fix_range (const char *const_str)
398 char *str, *dash, *comma;
400 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
401 REG2 are either register names or register numbers. The effect
402 of this option is to mark the registers in the range from REG1 to
403 REG2 as ``fixed'' so they won't be used by the compiler. This is
404 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
406 i = strlen (const_str);
407 str = (char *) alloca (i + 1);
408 memcpy (str, const_str, i + 1);
412 dash = strchr (str, '-');
415 warning (0, "value of -mfixed-range must have form REG1-REG2");
420 comma = strchr (dash + 1, ',');
424 first = decode_reg_name (str);
427 warning (0, "unknown register name: %s", str);
431 last = decode_reg_name (dash + 1);
434 warning (0, "unknown register name: %s", dash + 1);
442 warning (0, "%s-%s is an empty range", str, dash + 1);
446 for (i = first; i <= last; ++i)
447 fixed_regs[i] = call_used_regs[i] = 1;
456 /* Check if all floating point registers have been fixed. */
457 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
462 target_flags |= MASK_DISABLE_FPREGS;
465 /* Implement TARGET_HANDLE_OPTION. */
468 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
473 case OPT_mpa_risc_1_0:
475 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
479 case OPT_mpa_risc_1_1:
481 target_flags &= ~MASK_PA_20;
482 target_flags |= MASK_PA_11;
485 case OPT_mpa_risc_2_0:
487 target_flags |= MASK_PA_11 | MASK_PA_20;
491 if (strcmp (arg, "8000") == 0)
492 pa_cpu = PROCESSOR_8000;
493 else if (strcmp (arg, "7100") == 0)
494 pa_cpu = PROCESSOR_7100;
495 else if (strcmp (arg, "700") == 0)
496 pa_cpu = PROCESSOR_700;
497 else if (strcmp (arg, "7100LC") == 0)
498 pa_cpu = PROCESSOR_7100LC;
499 else if (strcmp (arg, "7200") == 0)
500 pa_cpu = PROCESSOR_7200;
501 else if (strcmp (arg, "7300") == 0)
502 pa_cpu = PROCESSOR_7300;
507 case OPT_mfixed_range_:
517 #if TARGET_HPUX_10_10
523 #if TARGET_HPUX_11_11
534 /* Implement the TARGET_OPTION_OVERRIDE hook. */
537 pa_option_override (void)
539 /* Unconditional branches in the delay slot are not compatible with dwarf2
540 call frame information. There is no benefit in using this optimization
541 on PA8000 and later processors. */
542 if (pa_cpu >= PROCESSOR_8000
543 || (targetm.except_unwind_info () == UI_DWARF2 && flag_exceptions)
544 || flag_unwind_tables)
545 target_flags &= ~MASK_JUMP_IN_DELAY;
547 if (flag_pic && TARGET_PORTABLE_RUNTIME)
549 warning (0, "PIC code generation is not supported in the portable runtime model");
552 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
554 warning (0, "PIC code generation is not compatible with fast indirect calls");
557 if (! TARGET_GAS && write_symbols != NO_DEBUG)
559 warning (0, "-g is only supported when using GAS on this processor,");
560 warning (0, "-g option disabled");
561 write_symbols = NO_DEBUG;
564 /* We only support the "big PIC" model now. And we always generate PIC
565 code when in 64bit mode. */
566 if (flag_pic == 1 || TARGET_64BIT)
569 /* Disable -freorder-blocks-and-partition as we don't support hot and
570 cold partitioning. */
571 if (flag_reorder_blocks_and_partition)
573 inform (input_location,
574 "-freorder-blocks-and-partition does not work "
575 "on this architecture");
576 flag_reorder_blocks_and_partition = 0;
577 flag_reorder_blocks = 1;
580 /* We can't guarantee that .dword is available for 32-bit targets. */
581 if (UNITS_PER_WORD == 4)
582 targetm.asm_out.aligned_op.di = NULL;
584 /* The unaligned ops are only available when using GAS. */
587 targetm.asm_out.unaligned_op.hi = NULL;
588 targetm.asm_out.unaligned_op.si = NULL;
589 targetm.asm_out.unaligned_op.di = NULL;
592 init_machine_status = pa_init_machine_status;
596 pa_init_builtins (void)
598 #ifdef DONT_HAVE_FPUTC_UNLOCKED
599 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
600 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
601 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
602 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
605 if (built_in_decls [BUILT_IN_FINITE])
606 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
607 if (built_in_decls [BUILT_IN_FINITEF])
608 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
612 /* Function to init struct machine_function.
613 This will be called, via a pointer variable,
614 from push_function_context. */
616 static struct machine_function *
617 pa_init_machine_status (void)
619 return ggc_alloc_cleared_machine_function ();
622 /* If FROM is a probable pointer register, mark TO as a probable
623 pointer register with the same pointer alignment as FROM. */
626 copy_reg_pointer (rtx to, rtx from)
628 if (REG_POINTER (from))
629 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
632 /* Return 1 if X contains a symbolic expression. We know these
633 expressions will have one of a few well defined forms, so
634 we need only check those forms. */
636 symbolic_expression_p (rtx x)
639 /* Strip off any HIGH. */
640 if (GET_CODE (x) == HIGH)
643 return (symbolic_operand (x, VOIDmode));
646 /* Accept any constant that can be moved in one instruction into a
649 cint_ok_for_move (HOST_WIDE_INT ival)
651 /* OK if ldo, ldil, or zdepi, can be used. */
652 return (VAL_14_BITS_P (ival)
653 || ldil_cint_p (ival)
654 || zdepi_cint_p (ival));
657 /* Return truth value of whether OP can be used as an operand in a
660 adddi3_operand (rtx op, enum machine_mode mode)
662 return (register_operand (op, mode)
663 || (GET_CODE (op) == CONST_INT
664 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
667 /* True iff the operand OP can be used as the destination operand of
668 an integer store. This also implies the operand could be used as
669 the source operand of an integer load. Symbolic, lo_sum and indexed
670 memory operands are not allowed. We accept reloading pseudos and
671 other memory operands. */
673 integer_store_memory_operand (rtx op, enum machine_mode mode)
675 return ((reload_in_progress
677 && REGNO (op) >= FIRST_PSEUDO_REGISTER
678 && reg_renumber [REGNO (op)] < 0)
679 || (GET_CODE (op) == MEM
680 && (reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
681 && !symbolic_memory_operand (op, VOIDmode)
682 && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
683 && !IS_INDEX_ADDR_P (XEXP (op, 0))));
686 /* True iff ldil can be used to load this CONST_INT. The least
687 significant 11 bits of the value must be zero and the value must
688 not change sign when extended from 32 to 64 bits. */
690 ldil_cint_p (HOST_WIDE_INT ival)
692 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
694 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
697 /* True iff zdepi can be used to generate this CONST_INT.
698 zdepi first sign extends a 5-bit signed number to a given field
699 length, then places this field anywhere in a zero. */
701 zdepi_cint_p (unsigned HOST_WIDE_INT x)
703 unsigned HOST_WIDE_INT lsb_mask, t;
705 /* This might not be obvious, but it's at least fast.
706 This function is critical; we don't have the time loops would take. */
708 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
709 /* Return true iff t is a power of two. */
710 return ((t & (t - 1)) == 0);
713 /* True iff depi or extru can be used to compute (reg & mask).
714 Accept bit pattern like these:
719 and_mask_p (unsigned HOST_WIDE_INT mask)
722 mask += mask & -mask;
723 return (mask & (mask - 1)) == 0;
726 /* True iff depi can be used to compute (reg | MASK). */
728 ior_mask_p (unsigned HOST_WIDE_INT mask)
730 mask += mask & -mask;
731 return (mask & (mask - 1)) == 0;
734 /* Legitimize PIC addresses. If the address is already
735 position-independent, we return ORIG. Newly generated
736 position-independent addresses go to REG. If we need more
737 than one register, we lose. */
740 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
744 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
746 /* Labels need special handling. */
747 if (pic_label_operand (orig, mode))
751 /* We do not want to go through the movXX expanders here since that
752 would create recursion.
754 Nor do we really want to call a generator for a named pattern
755 since that requires multiple patterns if we want to support
758 So instead we just emit the raw set, which avoids the movXX
759 expanders completely. */
760 mark_reg_pointer (reg, BITS_PER_UNIT);
761 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
763 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
764 add_reg_note (insn, REG_EQUAL, orig);
766 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
767 and update LABEL_NUSES because this is not done automatically. */
768 if (reload_in_progress || reload_completed)
770 /* Extract LABEL_REF. */
771 if (GET_CODE (orig) == CONST)
772 orig = XEXP (XEXP (orig, 0), 0);
773 /* Extract CODE_LABEL. */
774 orig = XEXP (orig, 0);
775 add_reg_note (insn, REG_LABEL_OPERAND, orig);
776 LABEL_NUSES (orig)++;
778 crtl->uses_pic_offset_table = 1;
781 if (GET_CODE (orig) == SYMBOL_REF)
787 /* Before reload, allocate a temporary register for the intermediate
788 result. This allows the sequence to be deleted when the final
789 result is unused and the insns are trivially dead. */
790 tmp_reg = ((reload_in_progress || reload_completed)
791 ? reg : gen_reg_rtx (Pmode));
793 if (function_label_operand (orig, mode))
795 /* Force function label into memory in word mode. */
796 orig = XEXP (force_const_mem (word_mode, orig), 0);
797 /* Load plabel address from DLT. */
798 emit_move_insn (tmp_reg,
799 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
800 gen_rtx_HIGH (word_mode, orig)));
802 = gen_const_mem (Pmode,
803 gen_rtx_LO_SUM (Pmode, tmp_reg,
804 gen_rtx_UNSPEC (Pmode,
807 emit_move_insn (reg, pic_ref);
808 /* Now load address of function descriptor. */
809 pic_ref = gen_rtx_MEM (Pmode, reg);
813 /* Load symbol reference from DLT. */
814 emit_move_insn (tmp_reg,
815 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
816 gen_rtx_HIGH (word_mode, orig)));
818 = gen_const_mem (Pmode,
819 gen_rtx_LO_SUM (Pmode, tmp_reg,
820 gen_rtx_UNSPEC (Pmode,
825 crtl->uses_pic_offset_table = 1;
826 mark_reg_pointer (reg, BITS_PER_UNIT);
827 insn = emit_move_insn (reg, pic_ref);
829 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
830 set_unique_reg_note (insn, REG_EQUAL, orig);
834 else if (GET_CODE (orig) == CONST)
838 if (GET_CODE (XEXP (orig, 0)) == PLUS
839 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
843 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
845 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
846 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
847 base == reg ? 0 : reg);
849 if (GET_CODE (orig) == CONST_INT)
851 if (INT_14_BITS (orig))
852 return plus_constant (base, INTVAL (orig));
853 orig = force_reg (Pmode, orig);
855 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
856 /* Likewise, should we set special REG_NOTEs here? */
862 static GTY(()) rtx gen_tls_tga;
865 gen_tls_get_addr (void)
868 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
873 hppa_tls_call (rtx arg)
877 ret = gen_reg_rtx (Pmode);
878 emit_library_call_value (gen_tls_get_addr (), ret,
879 LCT_CONST, Pmode, 1, arg, Pmode);
885 legitimize_tls_address (rtx addr)
887 rtx ret, insn, tmp, t1, t2, tp;
888 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
892 case TLS_MODEL_GLOBAL_DYNAMIC:
893 tmp = gen_reg_rtx (Pmode);
895 emit_insn (gen_tgd_load_pic (tmp, addr));
897 emit_insn (gen_tgd_load (tmp, addr));
898 ret = hppa_tls_call (tmp);
901 case TLS_MODEL_LOCAL_DYNAMIC:
902 ret = gen_reg_rtx (Pmode);
903 tmp = gen_reg_rtx (Pmode);
906 emit_insn (gen_tld_load_pic (tmp, addr));
908 emit_insn (gen_tld_load (tmp, addr));
909 t1 = hppa_tls_call (tmp);
912 t2 = gen_reg_rtx (Pmode);
913 emit_libcall_block (insn, t2, t1,
914 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
916 emit_insn (gen_tld_offset_load (ret, addr, t2));
919 case TLS_MODEL_INITIAL_EXEC:
920 tp = gen_reg_rtx (Pmode);
921 tmp = gen_reg_rtx (Pmode);
922 ret = gen_reg_rtx (Pmode);
923 emit_insn (gen_tp_load (tp));
925 emit_insn (gen_tie_load_pic (tmp, addr));
927 emit_insn (gen_tie_load (tmp, addr));
928 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
931 case TLS_MODEL_LOCAL_EXEC:
932 tp = gen_reg_rtx (Pmode);
933 ret = gen_reg_rtx (Pmode);
934 emit_insn (gen_tp_load (tp));
935 emit_insn (gen_tle_load (ret, addr, tp));
945 /* Try machine-dependent ways of modifying an illegitimate address
946 to be legitimate. If we find one, return the new, valid address.
947 This macro is used in only one place: `memory_address' in explow.c.
949 OLDX is the address as it was before break_out_memory_refs was called.
950 In some cases it is useful to look at this to decide what needs to be done.
952 It is always safe for this macro to do nothing. It exists to recognize
953 opportunities to optimize the output.
955 For the PA, transform:
957 memory(X + <large int>)
961 if (<large int> & mask) >= 16
962 Y = (<large int> & ~mask) + mask + 1 Round up.
964 Y = (<large int> & ~mask) Round down.
966 memory (Z + (<large int> - Y));
968 This is for CSE to find several similar references, and only use one Z.
970 X can either be a SYMBOL_REF or REG, but because combine cannot
971 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
972 D will not fit in 14 bits.
974 MODE_FLOAT references allow displacements which fit in 5 bits, so use
977 MODE_INT references allow displacements which fit in 14 bits, so use
980 This relies on the fact that most mode MODE_FLOAT references will use FP
981 registers and most mode MODE_INT references will use integer registers.
982 (In the rare case of an FP register used in an integer MODE, we depend
983 on secondary reloads to clean things up.)
986 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
987 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
988 addressing modes to be used).
990 Put X and Z into registers. Then put the entire expression into
994 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
995 enum machine_mode mode)
999 /* We need to canonicalize the order of operands in unscaled indexed
1000 addresses since the code that checks if an address is valid doesn't
1001 always try both orders. */
1002 if (!TARGET_NO_SPACE_REGS
1003 && GET_CODE (x) == PLUS
1004 && GET_MODE (x) == Pmode
1005 && REG_P (XEXP (x, 0))
1006 && REG_P (XEXP (x, 1))
1007 && REG_POINTER (XEXP (x, 0))
1008 && !REG_POINTER (XEXP (x, 1)))
1009 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1011 if (PA_SYMBOL_REF_TLS_P (x))
1012 return legitimize_tls_address (x);
1014 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1016 /* Strip off CONST. */
1017 if (GET_CODE (x) == CONST)
1020 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1021 That should always be safe. */
1022 if (GET_CODE (x) == PLUS
1023 && GET_CODE (XEXP (x, 0)) == REG
1024 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1026 rtx reg = force_reg (Pmode, XEXP (x, 1));
1027 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1030 /* Note we must reject symbols which represent function addresses
1031 since the assembler/linker can't handle arithmetic on plabels. */
1032 if (GET_CODE (x) == PLUS
1033 && GET_CODE (XEXP (x, 1)) == CONST_INT
1034 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1035 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1036 || GET_CODE (XEXP (x, 0)) == REG))
1038 rtx int_part, ptr_reg;
1040 int offset = INTVAL (XEXP (x, 1));
1043 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1044 ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
1046 /* Choose which way to round the offset. Round up if we
1047 are >= halfway to the next boundary. */
1048 if ((offset & mask) >= ((mask + 1) / 2))
1049 newoffset = (offset & ~ mask) + mask + 1;
1051 newoffset = (offset & ~ mask);
1053 /* If the newoffset will not fit in 14 bits (ldo), then
1054 handling this would take 4 or 5 instructions (2 to load
1055 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1056 add the new offset and the SYMBOL_REF.) Combine can
1057 not handle 4->2 or 5->2 combinations, so do not create
1059 if (! VAL_14_BITS_P (newoffset)
1060 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1062 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1065 gen_rtx_HIGH (Pmode, const_part));
1068 gen_rtx_LO_SUM (Pmode,
1069 tmp_reg, const_part));
1073 if (! VAL_14_BITS_P (newoffset))
1074 int_part = force_reg (Pmode, GEN_INT (newoffset));
1076 int_part = GEN_INT (newoffset);
1078 ptr_reg = force_reg (Pmode,
1079 gen_rtx_PLUS (Pmode,
1080 force_reg (Pmode, XEXP (x, 0)),
1083 return plus_constant (ptr_reg, offset - newoffset);
1086 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1088 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1089 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1090 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1091 && (OBJECT_P (XEXP (x, 1))
1092 || GET_CODE (XEXP (x, 1)) == SUBREG)
1093 && GET_CODE (XEXP (x, 1)) != CONST)
1095 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1099 if (GET_CODE (reg1) != REG)
1100 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1102 reg2 = XEXP (XEXP (x, 0), 0);
1103 if (GET_CODE (reg2) != REG)
1104 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1106 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1107 gen_rtx_MULT (Pmode,
1113 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1115 Only do so for floating point modes since this is more speculative
1116 and we lose if it's an integer store. */
1117 if (GET_CODE (x) == PLUS
1118 && GET_CODE (XEXP (x, 0)) == PLUS
1119 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1120 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1121 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1122 && (mode == SFmode || mode == DFmode))
1125 /* First, try and figure out what to use as a base register. */
1126 rtx reg1, reg2, base, idx;
1128 reg1 = XEXP (XEXP (x, 0), 1);
1133 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1134 then emit_move_sequence will turn on REG_POINTER so we'll know
1135 it's a base register below. */
1136 if (GET_CODE (reg1) != REG)
1137 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1139 if (GET_CODE (reg2) != REG)
1140 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1142 /* Figure out what the base and index are. */
1144 if (GET_CODE (reg1) == REG
1145 && REG_POINTER (reg1))
1148 idx = gen_rtx_PLUS (Pmode,
1149 gen_rtx_MULT (Pmode,
1150 XEXP (XEXP (XEXP (x, 0), 0), 0),
1151 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1154 else if (GET_CODE (reg2) == REG
1155 && REG_POINTER (reg2))
1164 /* If the index adds a large constant, try to scale the
1165 constant so that it can be loaded with only one insn. */
1166 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1167 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1168 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1169 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1171 /* Divide the CONST_INT by the scale factor, then add it to A. */
1172 int val = INTVAL (XEXP (idx, 1));
1174 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1175 reg1 = XEXP (XEXP (idx, 0), 0);
1176 if (GET_CODE (reg1) != REG)
1177 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1179 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1181 /* We can now generate a simple scaled indexed address. */
1184 (Pmode, gen_rtx_PLUS (Pmode,
1185 gen_rtx_MULT (Pmode, reg1,
1186 XEXP (XEXP (idx, 0), 1)),
1190 /* If B + C is still a valid base register, then add them. */
1191 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1192 && INTVAL (XEXP (idx, 1)) <= 4096
1193 && INTVAL (XEXP (idx, 1)) >= -4096)
1195 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1198 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1200 reg2 = XEXP (XEXP (idx, 0), 0);
1201 if (GET_CODE (reg2) != CONST_INT)
1202 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1204 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1205 gen_rtx_MULT (Pmode,
1211 /* Get the index into a register, then add the base + index and
1212 return a register holding the result. */
1214 /* First get A into a register. */
1215 reg1 = XEXP (XEXP (idx, 0), 0);
1216 if (GET_CODE (reg1) != REG)
1217 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1219 /* And get B into a register. */
1220 reg2 = XEXP (idx, 1);
1221 if (GET_CODE (reg2) != REG)
1222 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1224 reg1 = force_reg (Pmode,
1225 gen_rtx_PLUS (Pmode,
1226 gen_rtx_MULT (Pmode, reg1,
1227 XEXP (XEXP (idx, 0), 1)),
1230 /* Add the result to our base register and return. */
1231 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1235 /* Uh-oh. We might have an address for x[n-100000]. This needs
1236 special handling to avoid creating an indexed memory address
1237 with x-100000 as the base.
1239 If the constant part is small enough, then it's still safe because
1240 there is a guard page at the beginning and end of the data segment.
1242 Scaled references are common enough that we want to try and rearrange the
1243 terms so that we can use indexing for these addresses too. Only
1244 do the optimization for floatint point modes. */
1246 if (GET_CODE (x) == PLUS
1247 && symbolic_expression_p (XEXP (x, 1)))
1249 /* Ugly. We modify things here so that the address offset specified
1250 by the index expression is computed first, then added to x to form
1251 the entire address. */
1253 rtx regx1, regx2, regy1, regy2, y;
1255 /* Strip off any CONST. */
1257 if (GET_CODE (y) == CONST)
1260 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1262 /* See if this looks like
1263 (plus (mult (reg) (shadd_const))
1264 (const (plus (symbol_ref) (const_int))))
1266 Where const_int is small. In that case the const
1267 expression is a valid pointer for indexing.
1269 If const_int is big, but can be divided evenly by shadd_const
1270 and added to (reg). This allows more scaled indexed addresses. */
1271 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1272 && GET_CODE (XEXP (x, 0)) == MULT
1273 && GET_CODE (XEXP (y, 1)) == CONST_INT
1274 && INTVAL (XEXP (y, 1)) >= -4096
1275 && INTVAL (XEXP (y, 1)) <= 4095
1276 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1277 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1279 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1283 if (GET_CODE (reg1) != REG)
1284 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1286 reg2 = XEXP (XEXP (x, 0), 0);
1287 if (GET_CODE (reg2) != REG)
1288 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1290 return force_reg (Pmode,
1291 gen_rtx_PLUS (Pmode,
1292 gen_rtx_MULT (Pmode,
1297 else if ((mode == DFmode || mode == SFmode)
1298 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1299 && GET_CODE (XEXP (x, 0)) == MULT
1300 && GET_CODE (XEXP (y, 1)) == CONST_INT
1301 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1302 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1303 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1306 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1307 / INTVAL (XEXP (XEXP (x, 0), 1))));
1308 regx2 = XEXP (XEXP (x, 0), 0);
1309 if (GET_CODE (regx2) != REG)
1310 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1311 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1315 gen_rtx_PLUS (Pmode,
1316 gen_rtx_MULT (Pmode, regx2,
1317 XEXP (XEXP (x, 0), 1)),
1318 force_reg (Pmode, XEXP (y, 0))));
1320 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1321 && INTVAL (XEXP (y, 1)) >= -4096
1322 && INTVAL (XEXP (y, 1)) <= 4095)
1324 /* This is safe because of the guard page at the
1325 beginning and end of the data space. Just
1326 return the original address. */
1331 /* Doesn't look like one we can optimize. */
1332 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1333 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1334 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1335 regx1 = force_reg (Pmode,
1336 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1338 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1346 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1348 Compute extra cost of moving data between one register class
1351 Make moves from SAR so expensive they should never happen. We used to
1352 have 0xffff here, but that generates overflow in rare cases.
1354 Copies involving a FP register and a non-FP register are relatively
1355 expensive because they must go through memory.
1357 Other copies are reasonably cheap. */
1360 hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
1361 reg_class_t from, reg_class_t to)
1363 if (from == SHIFT_REGS)
1365 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1366 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1372 /* For the HPPA, REG and REG+CONST is cost 0
1373 and addresses involving symbolic constants are cost 2.
1375 PIC addresses are very expensive.
1377 It is no coincidence that this has the same structure
1378 as GO_IF_LEGITIMATE_ADDRESS. */
1381 hppa_address_cost (rtx X,
1382 bool speed ATTRIBUTE_UNUSED)
1384 switch (GET_CODE (X))
1397 /* Compute a (partial) cost for rtx X. Return true if the complete
1398 cost has been computed, and false if subexpressions should be
1399 scanned. In either case, *TOTAL contains the cost result. */
1402 hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
1403 bool speed ATTRIBUTE_UNUSED)
1408 if (INTVAL (x) == 0)
1410 else if (INT_14_BITS (x))
1427 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1428 && outer_code != SET)
1435 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1436 *total = COSTS_N_INSNS (3);
1437 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1438 *total = COSTS_N_INSNS (8);
1440 *total = COSTS_N_INSNS (20);
1444 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1446 *total = COSTS_N_INSNS (14);
1454 *total = COSTS_N_INSNS (60);
1457 case PLUS: /* this includes shNadd insns */
1459 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1460 *total = COSTS_N_INSNS (3);
1462 *total = COSTS_N_INSNS (1);
1468 *total = COSTS_N_INSNS (1);
1476 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1477 new rtx with the correct mode. */
1479 force_mode (enum machine_mode mode, rtx orig)
1481 if (mode == GET_MODE (orig))
1484 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1486 return gen_rtx_REG (mode, REGNO (orig));
1489 /* Return 1 if *X is a thread-local symbol. */
1492 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1494 return PA_SYMBOL_REF_TLS_P (*x);
1497 /* Return 1 if X contains a thread-local symbol. */
1500 pa_tls_referenced_p (rtx x)
1502 if (!TARGET_HAVE_TLS)
1505 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1508 /* Emit insns to move operands[1] into operands[0].
1510 Return 1 if we have written out everything that needs to be done to
1511 do the move. Otherwise, return 0 and the caller will emit the move
1514 Note SCRATCH_REG may not be in the proper mode depending on how it
1515 will be used. This routine is responsible for creating a new copy
1516 of SCRATCH_REG in the proper mode. */
1519 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1521 register rtx operand0 = operands[0];
1522 register rtx operand1 = operands[1];
1525 /* We can only handle indexed addresses in the destination operand
1526 of floating point stores. Thus, we need to break out indexed
1527 addresses from the destination operand. */
1528 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1530 gcc_assert (can_create_pseudo_p ());
1532 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1533 operand0 = replace_equiv_address (operand0, tem);
1536 /* On targets with non-equivalent space registers, break out unscaled
1537 indexed addresses from the source operand before the final CSE.
1538 We have to do this because the REG_POINTER flag is not correctly
1539 carried through various optimization passes and CSE may substitute
1540 a pseudo without the pointer set for one with the pointer set. As
1541 a result, we loose various opportunities to create insns with
1542 unscaled indexed addresses. */
1543 if (!TARGET_NO_SPACE_REGS
1544 && !cse_not_expected
1545 && GET_CODE (operand1) == MEM
1546 && GET_CODE (XEXP (operand1, 0)) == PLUS
1547 && REG_P (XEXP (XEXP (operand1, 0), 0))
1548 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1550 = replace_equiv_address (operand1,
1551 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1554 && reload_in_progress && GET_CODE (operand0) == REG
1555 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1556 operand0 = reg_equiv_mem[REGNO (operand0)];
1557 else if (scratch_reg
1558 && reload_in_progress && GET_CODE (operand0) == SUBREG
1559 && GET_CODE (SUBREG_REG (operand0)) == REG
1560 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1562 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1563 the code which tracks sets/uses for delete_output_reload. */
1564 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1565 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1566 SUBREG_BYTE (operand0));
1567 operand0 = alter_subreg (&temp);
1571 && reload_in_progress && GET_CODE (operand1) == REG
1572 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1573 operand1 = reg_equiv_mem[REGNO (operand1)];
1574 else if (scratch_reg
1575 && reload_in_progress && GET_CODE (operand1) == SUBREG
1576 && GET_CODE (SUBREG_REG (operand1)) == REG
1577 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1579 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1580 the code which tracks sets/uses for delete_output_reload. */
1581 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1582 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1583 SUBREG_BYTE (operand1));
1584 operand1 = alter_subreg (&temp);
1587 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1588 && ((tem = find_replacement (&XEXP (operand0, 0)))
1589 != XEXP (operand0, 0)))
1590 operand0 = replace_equiv_address (operand0, tem);
1592 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1593 && ((tem = find_replacement (&XEXP (operand1, 0)))
1594 != XEXP (operand1, 0)))
1595 operand1 = replace_equiv_address (operand1, tem);
1597 /* Handle secondary reloads for loads/stores of FP registers from
1598 REG+D addresses where D does not fit in 5 or 14 bits, including
1599 (subreg (mem (addr))) cases. */
1601 && fp_reg_operand (operand0, mode)
1602 && ((GET_CODE (operand1) == MEM
1603 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1604 XEXP (operand1, 0)))
1605 || ((GET_CODE (operand1) == SUBREG
1606 && GET_CODE (XEXP (operand1, 0)) == MEM
1607 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1609 XEXP (XEXP (operand1, 0), 0))))))
1611 if (GET_CODE (operand1) == SUBREG)
1612 operand1 = XEXP (operand1, 0);
1614 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1615 it in WORD_MODE regardless of what mode it was originally given
1617 scratch_reg = force_mode (word_mode, scratch_reg);
1619 /* D might not fit in 14 bits either; for such cases load D into
1621 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1623 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1624 emit_move_insn (scratch_reg,
1625 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1627 XEXP (XEXP (operand1, 0), 0),
1631 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1632 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1633 replace_equiv_address (operand1, scratch_reg)));
1636 else if (scratch_reg
1637 && fp_reg_operand (operand1, mode)
1638 && ((GET_CODE (operand0) == MEM
1639 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1641 XEXP (operand0, 0)))
1642 || ((GET_CODE (operand0) == SUBREG)
1643 && GET_CODE (XEXP (operand0, 0)) == MEM
1644 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1646 XEXP (XEXP (operand0, 0), 0)))))
1648 if (GET_CODE (operand0) == SUBREG)
1649 operand0 = XEXP (operand0, 0);
1651 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1652 it in WORD_MODE regardless of what mode it was originally given
1654 scratch_reg = force_mode (word_mode, scratch_reg);
1656 /* D might not fit in 14 bits either; for such cases load D into
1658 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1660 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1661 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1664 XEXP (XEXP (operand0, 0),
1669 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1670 emit_insn (gen_rtx_SET (VOIDmode,
1671 replace_equiv_address (operand0, scratch_reg),
1675 /* Handle secondary reloads for loads of FP registers from constant
1676 expressions by forcing the constant into memory.
1678 Use scratch_reg to hold the address of the memory location.
1680 The proper fix is to change TARGET_PREFERRED_RELOAD_CLASS to return
1681 NO_REGS when presented with a const_int and a register class
1682 containing only FP registers. Doing so unfortunately creates
1683 more problems than it solves. Fix this for 2.5. */
1684 else if (scratch_reg
1685 && CONSTANT_P (operand1)
1686 && fp_reg_operand (operand0, mode))
1688 rtx const_mem, xoperands[2];
1690 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1691 it in WORD_MODE regardless of what mode it was originally given
1693 scratch_reg = force_mode (word_mode, scratch_reg);
1695 /* Force the constant into memory and put the address of the
1696 memory location into scratch_reg. */
1697 const_mem = force_const_mem (mode, operand1);
1698 xoperands[0] = scratch_reg;
1699 xoperands[1] = XEXP (const_mem, 0);
1700 emit_move_sequence (xoperands, Pmode, 0);
1702 /* Now load the destination register. */
1703 emit_insn (gen_rtx_SET (mode, operand0,
1704 replace_equiv_address (const_mem, scratch_reg)));
1707 /* Handle secondary reloads for SAR. These occur when trying to load
1708 the SAR from memory, FP register, or with a constant. */
1709 else if (scratch_reg
1710 && GET_CODE (operand0) == REG
1711 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1712 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1713 && (GET_CODE (operand1) == MEM
1714 || GET_CODE (operand1) == CONST_INT
1715 || (GET_CODE (operand1) == REG
1716 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1718 /* D might not fit in 14 bits either; for such cases load D into
1720 if (GET_CODE (operand1) == MEM
1721 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1723 /* We are reloading the address into the scratch register, so we
1724 want to make sure the scratch register is a full register. */
1725 scratch_reg = force_mode (word_mode, scratch_reg);
1727 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1728 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1731 XEXP (XEXP (operand1, 0),
1735 /* Now we are going to load the scratch register from memory,
1736 we want to load it in the same width as the original MEM,
1737 which must be the same as the width of the ultimate destination,
1739 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1741 emit_move_insn (scratch_reg,
1742 replace_equiv_address (operand1, scratch_reg));
1746 /* We want to load the scratch register using the same mode as
1747 the ultimate destination. */
1748 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1750 emit_move_insn (scratch_reg, operand1);
1753 /* And emit the insn to set the ultimate destination. We know that
1754 the scratch register has the same mode as the destination at this
1756 emit_move_insn (operand0, scratch_reg);
1759 /* Handle the most common case: storing into a register. */
1760 else if (register_operand (operand0, mode))
1762 if (register_operand (operand1, mode)
1763 || (GET_CODE (operand1) == CONST_INT
1764 && cint_ok_for_move (INTVAL (operand1)))
1765 || (operand1 == CONST0_RTX (mode))
1766 || (GET_CODE (operand1) == HIGH
1767 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1768 /* Only `general_operands' can come here, so MEM is ok. */
1769 || GET_CODE (operand1) == MEM)
1771 /* Various sets are created during RTL generation which don't
1772 have the REG_POINTER flag correctly set. After the CSE pass,
1773 instruction recognition can fail if we don't consistently
1774 set this flag when performing register copies. This should
1775 also improve the opportunities for creating insns that use
1776 unscaled indexing. */
1777 if (REG_P (operand0) && REG_P (operand1))
1779 if (REG_POINTER (operand1)
1780 && !REG_POINTER (operand0)
1781 && !HARD_REGISTER_P (operand0))
1782 copy_reg_pointer (operand0, operand1);
1785 /* When MEMs are broken out, the REG_POINTER flag doesn't
1786 get set. In some cases, we can set the REG_POINTER flag
1787 from the declaration for the MEM. */
1788 if (REG_P (operand0)
1789 && GET_CODE (operand1) == MEM
1790 && !REG_POINTER (operand0))
1792 tree decl = MEM_EXPR (operand1);
1794 /* Set the register pointer flag and register alignment
1795 if the declaration for this memory reference is a
1801 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1803 if (TREE_CODE (decl) == COMPONENT_REF)
1804 decl = TREE_OPERAND (decl, 1);
1806 type = TREE_TYPE (decl);
1807 type = strip_array_types (type);
1809 if (POINTER_TYPE_P (type))
1813 type = TREE_TYPE (type);
1814 /* Using TYPE_ALIGN_OK is rather conservative as
1815 only the ada frontend actually sets it. */
1816 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1818 mark_reg_pointer (operand0, align);
1823 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1827 else if (GET_CODE (operand0) == MEM)
1829 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1830 && !(reload_in_progress || reload_completed))
1832 rtx temp = gen_reg_rtx (DFmode);
1834 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1835 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1838 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1840 /* Run this case quickly. */
1841 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1844 if (! (reload_in_progress || reload_completed))
1846 operands[0] = validize_mem (operand0);
1847 operands[1] = operand1 = force_reg (mode, operand1);
1851 /* Simplify the source if we need to.
1852 Note we do have to handle function labels here, even though we do
1853 not consider them legitimate constants. Loop optimizations can
1854 call the emit_move_xxx with one as a source. */
1855 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1856 || function_label_operand (operand1, mode)
1857 || (GET_CODE (operand1) == HIGH
1858 && symbolic_operand (XEXP (operand1, 0), mode)))
1862 if (GET_CODE (operand1) == HIGH)
1865 operand1 = XEXP (operand1, 0);
1867 if (symbolic_operand (operand1, mode))
1869 /* Argh. The assembler and linker can't handle arithmetic
1872 So we force the plabel into memory, load operand0 from
1873 the memory location, then add in the constant part. */
1874 if ((GET_CODE (operand1) == CONST
1875 && GET_CODE (XEXP (operand1, 0)) == PLUS
1876 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1877 || function_label_operand (operand1, mode))
1879 rtx temp, const_part;
1881 /* Figure out what (if any) scratch register to use. */
1882 if (reload_in_progress || reload_completed)
1884 scratch_reg = scratch_reg ? scratch_reg : operand0;
1885 /* SCRATCH_REG will hold an address and maybe the actual
1886 data. We want it in WORD_MODE regardless of what mode it
1887 was originally given to us. */
1888 scratch_reg = force_mode (word_mode, scratch_reg);
1891 scratch_reg = gen_reg_rtx (Pmode);
1893 if (GET_CODE (operand1) == CONST)
1895 /* Save away the constant part of the expression. */
1896 const_part = XEXP (XEXP (operand1, 0), 1);
1897 gcc_assert (GET_CODE (const_part) == CONST_INT);
1899 /* Force the function label into memory. */
1900 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1904 /* No constant part. */
1905 const_part = NULL_RTX;
1907 /* Force the function label into memory. */
1908 temp = force_const_mem (mode, operand1);
1912 /* Get the address of the memory location. PIC-ify it if
1914 temp = XEXP (temp, 0);
1916 temp = legitimize_pic_address (temp, mode, scratch_reg);
1918 /* Put the address of the memory location into our destination
1921 emit_move_sequence (operands, mode, scratch_reg);
1923 /* Now load from the memory location into our destination
1925 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1926 emit_move_sequence (operands, mode, scratch_reg);
1928 /* And add back in the constant part. */
1929 if (const_part != NULL_RTX)
1930 expand_inc (operand0, const_part);
1939 if (reload_in_progress || reload_completed)
1941 temp = scratch_reg ? scratch_reg : operand0;
1942 /* TEMP will hold an address and maybe the actual
1943 data. We want it in WORD_MODE regardless of what mode it
1944 was originally given to us. */
1945 temp = force_mode (word_mode, temp);
1948 temp = gen_reg_rtx (Pmode);
1950 /* (const (plus (symbol) (const_int))) must be forced to
1951 memory during/after reload if the const_int will not fit
1953 if (GET_CODE (operand1) == CONST
1954 && GET_CODE (XEXP (operand1, 0)) == PLUS
1955 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1956 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1957 && (reload_completed || reload_in_progress)
1960 rtx const_mem = force_const_mem (mode, operand1);
1961 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1963 operands[1] = replace_equiv_address (const_mem, operands[1]);
1964 emit_move_sequence (operands, mode, temp);
1968 operands[1] = legitimize_pic_address (operand1, mode, temp);
1969 if (REG_P (operand0) && REG_P (operands[1]))
1970 copy_reg_pointer (operand0, operands[1]);
1971 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1974 /* On the HPPA, references to data space are supposed to use dp,
1975 register 27, but showing it in the RTL inhibits various cse
1976 and loop optimizations. */
1981 if (reload_in_progress || reload_completed)
1983 temp = scratch_reg ? scratch_reg : operand0;
1984 /* TEMP will hold an address and maybe the actual
1985 data. We want it in WORD_MODE regardless of what mode it
1986 was originally given to us. */
1987 temp = force_mode (word_mode, temp);
1990 temp = gen_reg_rtx (mode);
1992 /* Loading a SYMBOL_REF into a register makes that register
1993 safe to be used as the base in an indexed address.
1995 Don't mark hard registers though. That loses. */
1996 if (GET_CODE (operand0) == REG
1997 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1998 mark_reg_pointer (operand0, BITS_PER_UNIT);
1999 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2000 mark_reg_pointer (temp, BITS_PER_UNIT);
2003 set = gen_rtx_SET (mode, operand0, temp);
2005 set = gen_rtx_SET (VOIDmode,
2007 gen_rtx_LO_SUM (mode, temp, operand1));
2009 emit_insn (gen_rtx_SET (VOIDmode,
2011 gen_rtx_HIGH (mode, operand1)));
2017 else if (pa_tls_referenced_p (operand1))
2022 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2024 addend = XEXP (XEXP (tmp, 0), 1);
2025 tmp = XEXP (XEXP (tmp, 0), 0);
2028 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2029 tmp = legitimize_tls_address (tmp);
2032 tmp = gen_rtx_PLUS (mode, tmp, addend);
2033 tmp = force_operand (tmp, operands[0]);
2037 else if (GET_CODE (operand1) != CONST_INT
2038 || !cint_ok_for_move (INTVAL (operand1)))
2042 HOST_WIDE_INT value = 0;
2043 HOST_WIDE_INT insv = 0;
2046 if (GET_CODE (operand1) == CONST_INT)
2047 value = INTVAL (operand1);
2050 && GET_CODE (operand1) == CONST_INT
2051 && HOST_BITS_PER_WIDE_INT > 32
2052 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2056 /* Extract the low order 32 bits of the value and sign extend.
2057 If the new value is the same as the original value, we can
2058 can use the original value as-is. If the new value is
2059 different, we use it and insert the most-significant 32-bits
2060 of the original value into the final result. */
2061 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2062 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2065 #if HOST_BITS_PER_WIDE_INT > 32
2066 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2070 operand1 = GEN_INT (nval);
2074 if (reload_in_progress || reload_completed)
2075 temp = scratch_reg ? scratch_reg : operand0;
2077 temp = gen_reg_rtx (mode);
2079 /* We don't directly split DImode constants on 32-bit targets
2080 because PLUS uses an 11-bit immediate and the insn sequence
2081 generated is not as efficient as the one using HIGH/LO_SUM. */
2082 if (GET_CODE (operand1) == CONST_INT
2083 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2084 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2087 /* Directly break constant into high and low parts. This
2088 provides better optimization opportunities because various
2089 passes recognize constants split with PLUS but not LO_SUM.
2090 We use a 14-bit signed low part except when the addition
2091 of 0x4000 to the high part might change the sign of the
2093 HOST_WIDE_INT low = value & 0x3fff;
2094 HOST_WIDE_INT high = value & ~ 0x3fff;
2098 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2106 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2107 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2111 emit_insn (gen_rtx_SET (VOIDmode, temp,
2112 gen_rtx_HIGH (mode, operand1)));
2113 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2116 insn = emit_move_insn (operands[0], operands[1]);
2118 /* Now insert the most significant 32 bits of the value
2119 into the register. When we don't have a second register
2120 available, it could take up to nine instructions to load
2121 a 64-bit integer constant. Prior to reload, we force
2122 constants that would take more than three instructions
2123 to load to the constant pool. During and after reload,
2124 we have to handle all possible values. */
2127 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2128 register and the value to be inserted is outside the
2129 range that can be loaded with three depdi instructions. */
2130 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2132 operand1 = GEN_INT (insv);
2134 emit_insn (gen_rtx_SET (VOIDmode, temp,
2135 gen_rtx_HIGH (mode, operand1)));
2136 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2137 emit_insn (gen_insv (operand0, GEN_INT (32),
2142 int len = 5, pos = 27;
2144 /* Insert the bits using the depdi instruction. */
2147 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2148 HOST_WIDE_INT sign = v5 < 0;
2150 /* Left extend the insertion. */
2151 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2152 while (pos > 0 && (insv & 1) == sign)
2154 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2159 emit_insn (gen_insv (operand0, GEN_INT (len),
2160 GEN_INT (pos), GEN_INT (v5)));
2162 len = pos > 0 && pos < 5 ? pos : 5;
2168 set_unique_reg_note (insn, REG_EQUAL, op1);
2173 /* Now have insn-emit do whatever it normally does. */
2177 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2178 it will need a link/runtime reloc). */
2181 reloc_needed (tree exp)
2185 switch (TREE_CODE (exp))
2190 case POINTER_PLUS_EXPR:
2193 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2194 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2198 case NON_LVALUE_EXPR:
2199 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2205 unsigned HOST_WIDE_INT ix;
2207 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2209 reloc |= reloc_needed (value);
2222 /* Does operand (which is a symbolic_operand) live in text space?
2223 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2227 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2229 if (GET_CODE (operand) == CONST)
2230 operand = XEXP (XEXP (operand, 0), 0);
2233 if (GET_CODE (operand) == SYMBOL_REF)
2234 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2238 if (GET_CODE (operand) == SYMBOL_REF)
2239 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2245 /* Return the best assembler insn template
2246 for moving operands[1] into operands[0] as a fullword. */
2248 singlemove_string (rtx *operands)
2250 HOST_WIDE_INT intval;
2252 if (GET_CODE (operands[0]) == MEM)
2253 return "stw %r1,%0";
2254 if (GET_CODE (operands[1]) == MEM)
2256 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2261 gcc_assert (GET_MODE (operands[1]) == SFmode);
2263 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2265 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2266 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2268 operands[1] = GEN_INT (i);
2269 /* Fall through to CONST_INT case. */
2271 if (GET_CODE (operands[1]) == CONST_INT)
2273 intval = INTVAL (operands[1]);
2275 if (VAL_14_BITS_P (intval))
2277 else if ((intval & 0x7ff) == 0)
2278 return "ldil L'%1,%0";
2279 else if (zdepi_cint_p (intval))
2280 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2282 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2284 return "copy %1,%0";
2288 /* Compute position (in OP[1]) and width (in OP[2])
2289 useful for copying IMM to a register using the zdepi
2290 instructions. Store the immediate value to insert in OP[0]. */
2292 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2296 /* Find the least significant set bit in IMM. */
2297 for (lsb = 0; lsb < 32; lsb++)
2304 /* Choose variants based on *sign* of the 5-bit field. */
2305 if ((imm & 0x10) == 0)
2306 len = (lsb <= 28) ? 4 : 32 - lsb;
2309 /* Find the width of the bitstring in IMM. */
2310 for (len = 5; len < 32 - lsb; len++)
2312 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2316 /* Sign extend IMM as a 5-bit value. */
2317 imm = (imm & 0xf) - 0x10;
2325 /* Compute position (in OP[1]) and width (in OP[2])
2326 useful for copying IMM to a register using the depdi,z
2327 instructions. Store the immediate value to insert in OP[0]. */
2329 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2331 int lsb, len, maxlen;
2333 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2335 /* Find the least significant set bit in IMM. */
2336 for (lsb = 0; lsb < maxlen; lsb++)
2343 /* Choose variants based on *sign* of the 5-bit field. */
2344 if ((imm & 0x10) == 0)
2345 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2348 /* Find the width of the bitstring in IMM. */
2349 for (len = 5; len < maxlen - lsb; len++)
2351 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2355 /* Extend length if host is narrow and IMM is negative. */
2356 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2359 /* Sign extend IMM as a 5-bit value. */
2360 imm = (imm & 0xf) - 0x10;
2368 /* Output assembler code to perform a doubleword move insn
2369 with operands OPERANDS. */
2372 output_move_double (rtx *operands)
2374 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2376 rtx addreg0 = 0, addreg1 = 0;
2378 /* First classify both operands. */
2380 if (REG_P (operands[0]))
2382 else if (offsettable_memref_p (operands[0]))
2384 else if (GET_CODE (operands[0]) == MEM)
2389 if (REG_P (operands[1]))
2391 else if (CONSTANT_P (operands[1]))
2393 else if (offsettable_memref_p (operands[1]))
2395 else if (GET_CODE (operands[1]) == MEM)
2400 /* Check for the cases that the operand constraints are not
2401 supposed to allow to happen. */
2402 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2404 /* Handle copies between general and floating registers. */
2406 if (optype0 == REGOP && optype1 == REGOP
2407 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2409 if (FP_REG_P (operands[0]))
2411 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2412 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2413 return "{fldds|fldd} -16(%%sp),%0";
2417 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2418 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2419 return "{ldws|ldw} -12(%%sp),%R0";
2423 /* Handle auto decrementing and incrementing loads and stores
2424 specifically, since the structure of the function doesn't work
2425 for them without major modification. Do it better when we learn
2426 this port about the general inc/dec addressing of PA.
2427 (This was written by tege. Chide him if it doesn't work.) */
2429 if (optype0 == MEMOP)
2431 /* We have to output the address syntax ourselves, since print_operand
2432 doesn't deal with the addresses we want to use. Fix this later. */
2434 rtx addr = XEXP (operands[0], 0);
2435 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2437 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2439 operands[0] = XEXP (addr, 0);
2440 gcc_assert (GET_CODE (operands[1]) == REG
2441 && GET_CODE (operands[0]) == REG);
2443 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2445 /* No overlap between high target register and address
2446 register. (We do this in a non-obvious way to
2447 save a register file writeback) */
2448 if (GET_CODE (addr) == POST_INC)
2449 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2450 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2452 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2454 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2456 operands[0] = XEXP (addr, 0);
2457 gcc_assert (GET_CODE (operands[1]) == REG
2458 && GET_CODE (operands[0]) == REG);
2460 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2461 /* No overlap between high target register and address
2462 register. (We do this in a non-obvious way to save a
2463 register file writeback) */
2464 if (GET_CODE (addr) == PRE_INC)
2465 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2466 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2469 if (optype1 == MEMOP)
2471 /* We have to output the address syntax ourselves, since print_operand
2472 doesn't deal with the addresses we want to use. Fix this later. */
2474 rtx addr = XEXP (operands[1], 0);
2475 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2477 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2479 operands[1] = XEXP (addr, 0);
2480 gcc_assert (GET_CODE (operands[0]) == REG
2481 && GET_CODE (operands[1]) == REG);
2483 if (!reg_overlap_mentioned_p (high_reg, addr))
2485 /* No overlap between high target register and address
2486 register. (We do this in a non-obvious way to
2487 save a register file writeback) */
2488 if (GET_CODE (addr) == POST_INC)
2489 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2490 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2494 /* This is an undefined situation. We should load into the
2495 address register *and* update that register. Probably
2496 we don't need to handle this at all. */
2497 if (GET_CODE (addr) == POST_INC)
2498 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2499 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2502 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2504 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2506 operands[1] = XEXP (addr, 0);
2507 gcc_assert (GET_CODE (operands[0]) == REG
2508 && GET_CODE (operands[1]) == REG);
2510 if (!reg_overlap_mentioned_p (high_reg, addr))
2512 /* No overlap between high target register and address
2513 register. (We do this in a non-obvious way to
2514 save a register file writeback) */
2515 if (GET_CODE (addr) == PRE_INC)
2516 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2517 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2521 /* This is an undefined situation. We should load into the
2522 address register *and* update that register. Probably
2523 we don't need to handle this at all. */
2524 if (GET_CODE (addr) == PRE_INC)
2525 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2526 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2529 else if (GET_CODE (addr) == PLUS
2530 && GET_CODE (XEXP (addr, 0)) == MULT)
2533 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2535 if (!reg_overlap_mentioned_p (high_reg, addr))
2537 xoperands[0] = high_reg;
2538 xoperands[1] = XEXP (addr, 1);
2539 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2540 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2541 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2543 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2547 xoperands[0] = high_reg;
2548 xoperands[1] = XEXP (addr, 1);
2549 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2550 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2551 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2553 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2558 /* If an operand is an unoffsettable memory ref, find a register
2559 we can increment temporarily to make it refer to the second word. */
2561 if (optype0 == MEMOP)
2562 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2564 if (optype1 == MEMOP)
2565 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2567 /* Ok, we can do one word at a time.
2568 Normally we do the low-numbered word first.
2570 In either case, set up in LATEHALF the operands to use
2571 for the high-numbered word and in some cases alter the
2572 operands in OPERANDS to be suitable for the low-numbered word. */
2574 if (optype0 == REGOP)
2575 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2576 else if (optype0 == OFFSOP)
2577 latehalf[0] = adjust_address (operands[0], SImode, 4);
2579 latehalf[0] = operands[0];
2581 if (optype1 == REGOP)
2582 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2583 else if (optype1 == OFFSOP)
2584 latehalf[1] = adjust_address (operands[1], SImode, 4);
2585 else if (optype1 == CNSTOP)
2586 split_double (operands[1], &operands[1], &latehalf[1]);
2588 latehalf[1] = operands[1];
2590 /* If the first move would clobber the source of the second one,
2591 do them in the other order.
2593 This can happen in two cases:
2595 mem -> register where the first half of the destination register
2596 is the same register used in the memory's address. Reload
2597 can create such insns.
2599 mem in this case will be either register indirect or register
2600 indirect plus a valid offset.
2602 register -> register move where REGNO(dst) == REGNO(src + 1)
2603 someone (Tim/Tege?) claimed this can happen for parameter loads.
2605 Handle mem -> register case first. */
2606 if (optype0 == REGOP
2607 && (optype1 == MEMOP || optype1 == OFFSOP)
2608 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2611 /* Do the late half first. */
2613 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2614 output_asm_insn (singlemove_string (latehalf), latehalf);
2618 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2619 return singlemove_string (operands);
2622 /* Now handle register -> register case. */
2623 if (optype0 == REGOP && optype1 == REGOP
2624 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2626 output_asm_insn (singlemove_string (latehalf), latehalf);
2627 return singlemove_string (operands);
2630 /* Normal case: do the two words, low-numbered first. */
2632 output_asm_insn (singlemove_string (operands), operands);
2634 /* Make any unoffsettable addresses point at high-numbered word. */
2636 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2638 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2641 output_asm_insn (singlemove_string (latehalf), latehalf);
2643 /* Undo the adds we just did. */
2645 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2647 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2653 output_fp_move_double (rtx *operands)
2655 if (FP_REG_P (operands[0]))
2657 if (FP_REG_P (operands[1])
2658 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2659 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2661 output_asm_insn ("fldd%F1 %1,%0", operands);
2663 else if (FP_REG_P (operands[1]))
2665 output_asm_insn ("fstd%F0 %1,%0", operands);
2671 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2673 /* This is a pain. You have to be prepared to deal with an
2674 arbitrary address here including pre/post increment/decrement.
2676 so avoid this in the MD. */
2677 gcc_assert (GET_CODE (operands[0]) == REG);
2679 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2680 xoperands[0] = operands[0];
2681 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2686 /* Return a REG that occurs in ADDR with coefficient 1.
2687 ADDR can be effectively incremented by incrementing REG. */
2690 find_addr_reg (rtx addr)
2692 while (GET_CODE (addr) == PLUS)
2694 if (GET_CODE (XEXP (addr, 0)) == REG)
2695 addr = XEXP (addr, 0);
2696 else if (GET_CODE (XEXP (addr, 1)) == REG)
2697 addr = XEXP (addr, 1);
2698 else if (CONSTANT_P (XEXP (addr, 0)))
2699 addr = XEXP (addr, 1);
2700 else if (CONSTANT_P (XEXP (addr, 1)))
2701 addr = XEXP (addr, 0);
2705 gcc_assert (GET_CODE (addr) == REG);
2709 /* Emit code to perform a block move.
2711 OPERANDS[0] is the destination pointer as a REG, clobbered.
2712 OPERANDS[1] is the source pointer as a REG, clobbered.
2713 OPERANDS[2] is a register for temporary storage.
2714 OPERANDS[3] is a register for temporary storage.
2715 OPERANDS[4] is the size as a CONST_INT
2716 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2717 OPERANDS[6] is another temporary register. */
2720 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2722 int align = INTVAL (operands[5]);
2723 unsigned long n_bytes = INTVAL (operands[4]);
2725 /* We can't move more than a word at a time because the PA
2726 has no longer integer move insns. (Could use fp mem ops?) */
2727 if (align > (TARGET_64BIT ? 8 : 4))
2728 align = (TARGET_64BIT ? 8 : 4);
2730 /* Note that we know each loop below will execute at least twice
2731 (else we would have open-coded the copy). */
2735 /* Pre-adjust the loop counter. */
2736 operands[4] = GEN_INT (n_bytes - 16);
2737 output_asm_insn ("ldi %4,%2", operands);
2740 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2741 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2742 output_asm_insn ("std,ma %3,8(%0)", operands);
2743 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2744 output_asm_insn ("std,ma %6,8(%0)", operands);
2746 /* Handle the residual. There could be up to 7 bytes of
2747 residual to copy! */
2748 if (n_bytes % 16 != 0)
2750 operands[4] = GEN_INT (n_bytes % 8);
2751 if (n_bytes % 16 >= 8)
2752 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2753 if (n_bytes % 8 != 0)
2754 output_asm_insn ("ldd 0(%1),%6", operands);
2755 if (n_bytes % 16 >= 8)
2756 output_asm_insn ("std,ma %3,8(%0)", operands);
2757 if (n_bytes % 8 != 0)
2758 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2763 /* Pre-adjust the loop counter. */
2764 operands[4] = GEN_INT (n_bytes - 8);
2765 output_asm_insn ("ldi %4,%2", operands);
2768 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2769 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2770 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2771 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2772 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2774 /* Handle the residual. There could be up to 7 bytes of
2775 residual to copy! */
2776 if (n_bytes % 8 != 0)
2778 operands[4] = GEN_INT (n_bytes % 4);
2779 if (n_bytes % 8 >= 4)
2780 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2781 if (n_bytes % 4 != 0)
2782 output_asm_insn ("ldw 0(%1),%6", operands);
2783 if (n_bytes % 8 >= 4)
2784 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2785 if (n_bytes % 4 != 0)
2786 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2791 /* Pre-adjust the loop counter. */
2792 operands[4] = GEN_INT (n_bytes - 4);
2793 output_asm_insn ("ldi %4,%2", operands);
2796 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2797 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2798 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2799 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2800 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2802 /* Handle the residual. */
2803 if (n_bytes % 4 != 0)
2805 if (n_bytes % 4 >= 2)
2806 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2807 if (n_bytes % 2 != 0)
2808 output_asm_insn ("ldb 0(%1),%6", operands);
2809 if (n_bytes % 4 >= 2)
2810 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2811 if (n_bytes % 2 != 0)
2812 output_asm_insn ("stb %6,0(%0)", operands);
2817 /* Pre-adjust the loop counter. */
2818 operands[4] = GEN_INT (n_bytes - 2);
2819 output_asm_insn ("ldi %4,%2", operands);
2822 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2823 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2824 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2825 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2826 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2828 /* Handle the residual. */
2829 if (n_bytes % 2 != 0)
2831 output_asm_insn ("ldb 0(%1),%3", operands);
2832 output_asm_insn ("stb %3,0(%0)", operands);
2841 /* Count the number of insns necessary to handle this block move.
2843 Basic structure is the same as emit_block_move, except that we
2844 count insns rather than emit them. */
2847 compute_movmem_length (rtx insn)
2849 rtx pat = PATTERN (insn);
2850 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2851 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2852 unsigned int n_insns = 0;
2854 /* We can't move more than four bytes at a time because the PA
2855 has no longer integer move insns. (Could use fp mem ops?) */
2856 if (align > (TARGET_64BIT ? 8 : 4))
2857 align = (TARGET_64BIT ? 8 : 4);
2859 /* The basic copying loop. */
2863 if (n_bytes % (2 * align) != 0)
2865 if ((n_bytes % (2 * align)) >= align)
2868 if ((n_bytes % align) != 0)
2872 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2876 /* Emit code to perform a block clear.
2878 OPERANDS[0] is the destination pointer as a REG, clobbered.
2879 OPERANDS[1] is a register for temporary storage.
2880 OPERANDS[2] is the size as a CONST_INT
2881 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2884 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2886 int align = INTVAL (operands[3]);
2887 unsigned long n_bytes = INTVAL (operands[2]);
2889 /* We can't clear more than a word at a time because the PA
2890 has no longer integer move insns. */
2891 if (align > (TARGET_64BIT ? 8 : 4))
2892 align = (TARGET_64BIT ? 8 : 4);
2894 /* Note that we know each loop below will execute at least twice
2895 (else we would have open-coded the copy). */
2899 /* Pre-adjust the loop counter. */
2900 operands[2] = GEN_INT (n_bytes - 16);
2901 output_asm_insn ("ldi %2,%1", operands);
2904 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2905 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2906 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2908 /* Handle the residual. There could be up to 7 bytes of
2909 residual to copy! */
2910 if (n_bytes % 16 != 0)
2912 operands[2] = GEN_INT (n_bytes % 8);
2913 if (n_bytes % 16 >= 8)
2914 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2915 if (n_bytes % 8 != 0)
2916 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2921 /* Pre-adjust the loop counter. */
2922 operands[2] = GEN_INT (n_bytes - 8);
2923 output_asm_insn ("ldi %2,%1", operands);
2926 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2927 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2928 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2930 /* Handle the residual. There could be up to 7 bytes of
2931 residual to copy! */
2932 if (n_bytes % 8 != 0)
2934 operands[2] = GEN_INT (n_bytes % 4);
2935 if (n_bytes % 8 >= 4)
2936 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2937 if (n_bytes % 4 != 0)
2938 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2943 /* Pre-adjust the loop counter. */
2944 operands[2] = GEN_INT (n_bytes - 4);
2945 output_asm_insn ("ldi %2,%1", operands);
2948 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2949 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2950 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2952 /* Handle the residual. */
2953 if (n_bytes % 4 != 0)
2955 if (n_bytes % 4 >= 2)
2956 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2957 if (n_bytes % 2 != 0)
2958 output_asm_insn ("stb %%r0,0(%0)", operands);
2963 /* Pre-adjust the loop counter. */
2964 operands[2] = GEN_INT (n_bytes - 2);
2965 output_asm_insn ("ldi %2,%1", operands);
2968 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2969 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2970 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2972 /* Handle the residual. */
2973 if (n_bytes % 2 != 0)
2974 output_asm_insn ("stb %%r0,0(%0)", operands);
2983 /* Count the number of insns necessary to handle this block move.
2985 Basic structure is the same as emit_block_move, except that we
2986 count insns rather than emit them. */
2989 compute_clrmem_length (rtx insn)
2991 rtx pat = PATTERN (insn);
2992 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2993 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2994 unsigned int n_insns = 0;
2996 /* We can't clear more than a word at a time because the PA
2997 has no longer integer move insns. */
2998 if (align > (TARGET_64BIT ? 8 : 4))
2999 align = (TARGET_64BIT ? 8 : 4);
3001 /* The basic loop. */
3005 if (n_bytes % (2 * align) != 0)
3007 if ((n_bytes % (2 * align)) >= align)
3010 if ((n_bytes % align) != 0)
3014 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3020 output_and (rtx *operands)
3022 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3024 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3025 int ls0, ls1, ms0, p, len;
3027 for (ls0 = 0; ls0 < 32; ls0++)
3028 if ((mask & (1 << ls0)) == 0)
3031 for (ls1 = ls0; ls1 < 32; ls1++)
3032 if ((mask & (1 << ls1)) != 0)
3035 for (ms0 = ls1; ms0 < 32; ms0++)
3036 if ((mask & (1 << ms0)) == 0)
3039 gcc_assert (ms0 == 32);
3047 operands[2] = GEN_INT (len);
3048 return "{extru|extrw,u} %1,31,%2,%0";
3052 /* We could use this `depi' for the case above as well, but `depi'
3053 requires one more register file access than an `extru'. */
3058 operands[2] = GEN_INT (p);
3059 operands[3] = GEN_INT (len);
3060 return "{depi|depwi} 0,%2,%3,%0";
3064 return "and %1,%2,%0";
3067 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3068 storing the result in operands[0]. */
3070 output_64bit_and (rtx *operands)
3072 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3074 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3075 int ls0, ls1, ms0, p, len;
3077 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3078 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3081 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3082 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3085 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3086 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3089 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3091 if (ls1 == HOST_BITS_PER_WIDE_INT)
3097 operands[2] = GEN_INT (len);
3098 return "extrd,u %1,63,%2,%0";
3102 /* We could use this `depi' for the case above as well, but `depi'
3103 requires one more register file access than an `extru'. */
3108 operands[2] = GEN_INT (p);
3109 operands[3] = GEN_INT (len);
3110 return "depdi 0,%2,%3,%0";
3114 return "and %1,%2,%0";
3118 output_ior (rtx *operands)
3120 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3121 int bs0, bs1, p, len;
3123 if (INTVAL (operands[2]) == 0)
3124 return "copy %1,%0";
3126 for (bs0 = 0; bs0 < 32; bs0++)
3127 if ((mask & (1 << bs0)) != 0)
3130 for (bs1 = bs0; bs1 < 32; bs1++)
3131 if ((mask & (1 << bs1)) == 0)
3134 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3139 operands[2] = GEN_INT (p);
3140 operands[3] = GEN_INT (len);
3141 return "{depi|depwi} -1,%2,%3,%0";
3144 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3145 storing the result in operands[0]. */
3147 output_64bit_ior (rtx *operands)
3149 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3150 int bs0, bs1, p, len;
3152 if (INTVAL (operands[2]) == 0)
3153 return "copy %1,%0";
3155 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3156 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3159 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3160 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3163 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3164 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3169 operands[2] = GEN_INT (p);
3170 operands[3] = GEN_INT (len);
3171 return "depdi -1,%2,%3,%0";
3174 /* Target hook for assembling integer objects. This code handles
3175 aligned SI and DI integers specially since function references
3176 must be preceded by P%. */
3179 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3181 if (size == UNITS_PER_WORD
3183 && function_label_operand (x, VOIDmode))
3185 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3186 output_addr_const (asm_out_file, x);
3187 fputc ('\n', asm_out_file);
3190 return default_assemble_integer (x, size, aligned_p);
3193 /* Output an ascii string. */
3195 output_ascii (FILE *file, const char *p, int size)
3199 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3201 /* The HP assembler can only take strings of 256 characters at one
3202 time. This is a limitation on input line length, *not* the
3203 length of the string. Sigh. Even worse, it seems that the
3204 restriction is in number of input characters (see \xnn &
3205 \whatever). So we have to do this very carefully. */
3207 fputs ("\t.STRING \"", file);
3210 for (i = 0; i < size; i += 4)
3214 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3216 register unsigned int c = (unsigned char) p[i + io];
3218 if (c == '\"' || c == '\\')
3219 partial_output[co++] = '\\';
3220 if (c >= ' ' && c < 0177)
3221 partial_output[co++] = c;
3225 partial_output[co++] = '\\';
3226 partial_output[co++] = 'x';
3227 hexd = c / 16 - 0 + '0';
3229 hexd -= '9' - 'a' + 1;
3230 partial_output[co++] = hexd;
3231 hexd = c % 16 - 0 + '0';
3233 hexd -= '9' - 'a' + 1;
3234 partial_output[co++] = hexd;
3237 if (chars_output + co > 243)
3239 fputs ("\"\n\t.STRING \"", file);
3242 fwrite (partial_output, 1, (size_t) co, file);
3246 fputs ("\"\n", file);
3249 /* Try to rewrite floating point comparisons & branches to avoid
3250 useless add,tr insns.
3252 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3253 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3254 first attempt to remove useless add,tr insns. It is zero
3255 for the second pass as reorg sometimes leaves bogus REG_DEAD
3258 When CHECK_NOTES is zero we can only eliminate add,tr insns
3259 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3262 remove_useless_addtr_insns (int check_notes)
3265 static int pass = 0;
3267 /* This is fairly cheap, so always run it when optimizing. */
3271 int fbranch_count = 0;
3273 /* Walk all the insns in this function looking for fcmp & fbranch
3274 instructions. Keep track of how many of each we find. */
3275 for (insn = get_insns (); insn; insn = next_insn (insn))
3279 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3280 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3283 tmp = PATTERN (insn);
3285 /* It must be a set. */
3286 if (GET_CODE (tmp) != SET)
3289 /* If the destination is CCFP, then we've found an fcmp insn. */
3290 tmp = SET_DEST (tmp);
3291 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3297 tmp = PATTERN (insn);
3298 /* If this is an fbranch instruction, bump the fbranch counter. */
3299 if (GET_CODE (tmp) == SET
3300 && SET_DEST (tmp) == pc_rtx
3301 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3302 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3303 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3304 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3312 /* Find all floating point compare + branch insns. If possible,
3313 reverse the comparison & the branch to avoid add,tr insns. */
3314 for (insn = get_insns (); insn; insn = next_insn (insn))
3318 /* Ignore anything that isn't an INSN. */
3319 if (GET_CODE (insn) != INSN)
3322 tmp = PATTERN (insn);
3324 /* It must be a set. */
3325 if (GET_CODE (tmp) != SET)
3328 /* The destination must be CCFP, which is register zero. */
3329 tmp = SET_DEST (tmp);
3330 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3333 /* INSN should be a set of CCFP.
3335 See if the result of this insn is used in a reversed FP
3336 conditional branch. If so, reverse our condition and
3337 the branch. Doing so avoids useless add,tr insns. */
3338 next = next_insn (insn);
3341 /* Jumps, calls and labels stop our search. */
3342 if (GET_CODE (next) == JUMP_INSN
3343 || GET_CODE (next) == CALL_INSN
3344 || GET_CODE (next) == CODE_LABEL)
3347 /* As does another fcmp insn. */
3348 if (GET_CODE (next) == INSN
3349 && GET_CODE (PATTERN (next)) == SET
3350 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3351 && REGNO (SET_DEST (PATTERN (next))) == 0)
3354 next = next_insn (next);
3357 /* Is NEXT_INSN a branch? */
3359 && GET_CODE (next) == JUMP_INSN)
3361 rtx pattern = PATTERN (next);
3363 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3364 and CCFP dies, then reverse our conditional and the branch
3365 to avoid the add,tr. */
3366 if (GET_CODE (pattern) == SET
3367 && SET_DEST (pattern) == pc_rtx
3368 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3369 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3370 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3371 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3372 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3373 && (fcmp_count == fbranch_count
3375 && find_regno_note (next, REG_DEAD, 0))))
3377 /* Reverse the branch. */
3378 tmp = XEXP (SET_SRC (pattern), 1);
3379 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3380 XEXP (SET_SRC (pattern), 2) = tmp;
3381 INSN_CODE (next) = -1;
3383 /* Reverse our condition. */
3384 tmp = PATTERN (insn);
3385 PUT_CODE (XEXP (tmp, 1),
3386 (reverse_condition_maybe_unordered
3387 (GET_CODE (XEXP (tmp, 1)))));
3397 /* You may have trouble believing this, but this is the 32 bit HP-PA
3402 Variable arguments (optional; any number may be allocated)
3404 SP-(4*(N+9)) arg word N
3409 Fixed arguments (must be allocated; may remain unused)
3418 SP-32 External Data Pointer (DP)
3420 SP-24 External/stub RP (RP')
3424 SP-8 Calling Stub RP (RP'')
3429 SP-0 Stack Pointer (points to next available address)
3433 /* This function saves registers as follows. Registers marked with ' are
3434 this function's registers (as opposed to the previous function's).
3435 If a frame_pointer isn't needed, r4 is saved as a general register;
3436 the space for the frame pointer is still allocated, though, to keep
3442 SP (FP') Previous FP
3443 SP + 4 Alignment filler (sigh)
3444 SP + 8 Space for locals reserved here.
3448 SP + n All call saved register used.
3452 SP + o All call saved fp registers used.
3456 SP + p (SP') points to next available address.
3460 /* Global variables set by output_function_prologue(). */
3461 /* Size of frame. Need to know this to emit return insns from
3463 static HOST_WIDE_INT actual_fsize, local_fsize;
3464 static int save_fregs;
3466 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3467 Handle case where DISP > 8k by using the add_high_const patterns.
3469 Note in DISP > 8k case, we will leave the high part of the address
3470 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3473 store_reg (int reg, HOST_WIDE_INT disp, int base)
3475 rtx insn, dest, src, basereg;
3477 src = gen_rtx_REG (word_mode, reg);
3478 basereg = gen_rtx_REG (Pmode, base);
3479 if (VAL_14_BITS_P (disp))
3481 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3482 insn = emit_move_insn (dest, src);
3484 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3486 rtx delta = GEN_INT (disp);
3487 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3489 emit_move_insn (tmpreg, delta);
3490 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3493 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3494 gen_rtx_SET (VOIDmode, tmpreg,
3495 gen_rtx_PLUS (Pmode, basereg, delta)));
3496 RTX_FRAME_RELATED_P (insn) = 1;
3498 dest = gen_rtx_MEM (word_mode, tmpreg);
3499 insn = emit_move_insn (dest, src);
3503 rtx delta = GEN_INT (disp);
3504 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3505 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3507 emit_move_insn (tmpreg, high);
3508 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3509 insn = emit_move_insn (dest, src);
3511 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3512 gen_rtx_SET (VOIDmode,
3513 gen_rtx_MEM (word_mode,
3514 gen_rtx_PLUS (word_mode,
3521 RTX_FRAME_RELATED_P (insn) = 1;
3524 /* Emit RTL to store REG at the memory location specified by BASE and then
3525 add MOD to BASE. MOD must be <= 8k. */
3528 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3530 rtx insn, basereg, srcreg, delta;
3532 gcc_assert (VAL_14_BITS_P (mod));
3534 basereg = gen_rtx_REG (Pmode, base);
3535 srcreg = gen_rtx_REG (word_mode, reg);
3536 delta = GEN_INT (mod);
3538 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3541 RTX_FRAME_RELATED_P (insn) = 1;
3543 /* RTX_FRAME_RELATED_P must be set on each frame related set
3544 in a parallel with more than one element. */
3545 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3546 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3550 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3551 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3552 whether to add a frame note or not.
3554 In the DISP > 8k case, we leave the high part of the address in %r1.
3555 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3558 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3562 if (VAL_14_BITS_P (disp))
3564 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3565 plus_constant (gen_rtx_REG (Pmode, base), disp));
3567 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3569 rtx basereg = gen_rtx_REG (Pmode, base);
3570 rtx delta = GEN_INT (disp);
3571 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3573 emit_move_insn (tmpreg, delta);
3574 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3575 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3577 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3578 gen_rtx_SET (VOIDmode, tmpreg,
3579 gen_rtx_PLUS (Pmode, basereg, delta)));
3583 rtx basereg = gen_rtx_REG (Pmode, base);
3584 rtx delta = GEN_INT (disp);
3585 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3587 emit_move_insn (tmpreg,
3588 gen_rtx_PLUS (Pmode, basereg,
3589 gen_rtx_HIGH (Pmode, delta)));
3590 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3591 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3594 if (DO_FRAME_NOTES && note)
3595 RTX_FRAME_RELATED_P (insn) = 1;
3599 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3604 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3605 be consistent with the rounding and size calculation done here.
3606 Change them at the same time. */
3608 /* We do our own stack alignment. First, round the size of the
3609 stack locals up to a word boundary. */
3610 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3612 /* Space for previous frame pointer + filler. If any frame is
3613 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3614 waste some space here for the sake of HP compatibility. The
3615 first slot is only used when the frame pointer is needed. */
3616 if (size || frame_pointer_needed)
3617 size += STARTING_FRAME_OFFSET;
3619 /* If the current function calls __builtin_eh_return, then we need
3620 to allocate stack space for registers that will hold data for
3621 the exception handler. */
3622 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3626 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3628 size += i * UNITS_PER_WORD;
3631 /* Account for space used by the callee general register saves. */
3632 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3633 if (df_regs_ever_live_p (i))
3634 size += UNITS_PER_WORD;
3636 /* Account for space used by the callee floating point register saves. */
3637 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3638 if (df_regs_ever_live_p (i)
3639 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3643 /* We always save both halves of the FP register, so always
3644 increment the frame size by 8 bytes. */
3648 /* If any of the floating registers are saved, account for the
3649 alignment needed for the floating point register save block. */
3652 size = (size + 7) & ~7;
3657 /* The various ABIs include space for the outgoing parameters in the
3658 size of the current function's stack frame. We don't need to align
3659 for the outgoing arguments as their alignment is set by the final
3660 rounding for the frame as a whole. */
3661 size += crtl->outgoing_args_size;
3663 /* Allocate space for the fixed frame marker. This space must be
3664 allocated for any function that makes calls or allocates
3666 if (!current_function_is_leaf || size)
3667 size += TARGET_64BIT ? 48 : 32;
3669 /* Finally, round to the preferred stack boundary. */
3670 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3671 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3674 /* Generate the assembly code for function entry. FILE is a stdio
3675 stream to output the code to. SIZE is an int: how many units of
3676 temporary storage to allocate.
3678 Refer to the array `regs_ever_live' to determine which registers to
3679 save; `regs_ever_live[I]' is nonzero if register number I is ever
3680 used in the function. This function is responsible for knowing
3681 which registers should not be saved even if used. */
3683 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3684 of memory. If any fpu reg is used in the function, we allocate
3685 such a block here, at the bottom of the frame, just in case it's needed.
3687 If this function is a leaf procedure, then we may choose not
3688 to do a "save" insn. The decision about whether or not
3689 to do this is made in regclass.c. */
3692 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3694 /* The function's label and associated .PROC must never be
3695 separated and must be output *after* any profiling declarations
3696 to avoid changing spaces/subspaces within a procedure. */
3697 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3698 fputs ("\t.PROC\n", file);
3700 /* hppa_expand_prologue does the dirty work now. We just need
3701 to output the assembler directives which denote the start
3703 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3704 if (current_function_is_leaf)
3705 fputs (",NO_CALLS", file);
3707 fputs (",CALLS", file);
3709 fputs (",SAVE_RP", file);
3711 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3712 at the beginning of the frame and that it is used as the frame
3713 pointer for the frame. We do this because our current frame
3714 layout doesn't conform to that specified in the HP runtime
3715 documentation and we need a way to indicate to programs such as
3716 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3717 isn't used by HP compilers but is supported by the assembler.
3718 However, SAVE_SP is supposed to indicate that the previous stack
3719 pointer has been saved in the frame marker. */
3720 if (frame_pointer_needed)
3721 fputs (",SAVE_SP", file);
3723 /* Pass on information about the number of callee register saves
3724 performed in the prologue.
3726 The compiler is supposed to pass the highest register number
3727 saved, the assembler then has to adjust that number before
3728 entering it into the unwind descriptor (to account for any
3729 caller saved registers with lower register numbers than the
3730 first callee saved register). */
3732 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3735 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3737 fputs ("\n\t.ENTRY\n", file);
3739 remove_useless_addtr_insns (0);
3743 hppa_expand_prologue (void)
3745 int merge_sp_adjust_with_store = 0;
3746 HOST_WIDE_INT size = get_frame_size ();
3747 HOST_WIDE_INT offset;
3755 /* Compute total size for frame pointer, filler, locals and rounding to
3756 the next word boundary. Similar code appears in compute_frame_size
3757 and must be changed in tandem with this code. */
3758 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3759 if (local_fsize || frame_pointer_needed)
3760 local_fsize += STARTING_FRAME_OFFSET;
3762 actual_fsize = compute_frame_size (size, &save_fregs);
3763 if (flag_stack_usage)
3764 current_function_static_stack_size = actual_fsize;
3766 /* Compute a few things we will use often. */
3767 tmpreg = gen_rtx_REG (word_mode, 1);
3769 /* Save RP first. The calling conventions manual states RP will
3770 always be stored into the caller's frame at sp - 20 or sp - 16
3771 depending on which ABI is in use. */
3772 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3774 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3780 /* Allocate the local frame and set up the frame pointer if needed. */
3781 if (actual_fsize != 0)
3783 if (frame_pointer_needed)
3785 /* Copy the old frame pointer temporarily into %r1. Set up the
3786 new stack pointer, then store away the saved old frame pointer
3787 into the stack at sp and at the same time update the stack
3788 pointer by actual_fsize bytes. Two versions, first
3789 handles small (<8k) frames. The second handles large (>=8k)
3791 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3793 RTX_FRAME_RELATED_P (insn) = 1;
3795 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3797 RTX_FRAME_RELATED_P (insn) = 1;
3799 if (VAL_14_BITS_P (actual_fsize))
3800 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3803 /* It is incorrect to store the saved frame pointer at *sp,
3804 then increment sp (writes beyond the current stack boundary).
3806 So instead use stwm to store at *sp and post-increment the
3807 stack pointer as an atomic operation. Then increment sp to
3808 finish allocating the new frame. */
3809 HOST_WIDE_INT adjust1 = 8192 - 64;
3810 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3812 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3813 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3817 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3818 we need to store the previous stack pointer (frame pointer)
3819 into the frame marker on targets that use the HP unwind
3820 library. This allows the HP unwind library to be used to
3821 unwind GCC frames. However, we are not fully compatible
3822 with the HP library because our frame layout differs from
3823 that specified in the HP runtime specification.
3825 We don't want a frame note on this instruction as the frame
3826 marker moves during dynamic stack allocation.
3828 This instruction also serves as a blockage to prevent
3829 register spills from being scheduled before the stack
3830 pointer is raised. This is necessary as we store
3831 registers using the frame pointer as a base register,
3832 and the frame pointer is set before sp is raised. */
3833 if (TARGET_HPUX_UNWIND_LIBRARY)
3835 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3836 GEN_INT (TARGET_64BIT ? -8 : -4));
3838 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3839 hard_frame_pointer_rtx);
3842 emit_insn (gen_blockage ());
3844 /* no frame pointer needed. */
3847 /* In some cases we can perform the first callee register save
3848 and allocating the stack frame at the same time. If so, just
3849 make a note of it and defer allocating the frame until saving
3850 the callee registers. */
3851 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3852 merge_sp_adjust_with_store = 1;
3853 /* Can not optimize. Adjust the stack frame by actual_fsize
3856 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3861 /* Normal register save.
3863 Do not save the frame pointer in the frame_pointer_needed case. It
3864 was done earlier. */
3865 if (frame_pointer_needed)
3867 offset = local_fsize;
3869 /* Saving the EH return data registers in the frame is the simplest
3870 way to get the frame unwind information emitted. We put them
3871 just before the general registers. */
3872 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3874 unsigned int i, regno;
3878 regno = EH_RETURN_DATA_REGNO (i);
3879 if (regno == INVALID_REGNUM)
3882 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
3883 offset += UNITS_PER_WORD;
3887 for (i = 18; i >= 4; i--)
3888 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3890 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
3891 offset += UNITS_PER_WORD;
3894 /* Account for %r3 which is saved in a special place. */
3897 /* No frame pointer needed. */
3900 offset = local_fsize - actual_fsize;
3902 /* Saving the EH return data registers in the frame is the simplest
3903 way to get the frame unwind information emitted. */
3904 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3906 unsigned int i, regno;
3910 regno = EH_RETURN_DATA_REGNO (i);
3911 if (regno == INVALID_REGNUM)
3914 /* If merge_sp_adjust_with_store is nonzero, then we can
3915 optimize the first save. */
3916 if (merge_sp_adjust_with_store)
3918 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3919 merge_sp_adjust_with_store = 0;
3922 store_reg (regno, offset, STACK_POINTER_REGNUM);
3923 offset += UNITS_PER_WORD;
3927 for (i = 18; i >= 3; i--)
3928 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3930 /* If merge_sp_adjust_with_store is nonzero, then we can
3931 optimize the first GR save. */
3932 if (merge_sp_adjust_with_store)
3934 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3935 merge_sp_adjust_with_store = 0;
3938 store_reg (i, offset, STACK_POINTER_REGNUM);
3939 offset += UNITS_PER_WORD;
3943 /* If we wanted to merge the SP adjustment with a GR save, but we never
3944 did any GR saves, then just emit the adjustment here. */
3945 if (merge_sp_adjust_with_store)
3946 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3950 /* The hppa calling conventions say that %r19, the pic offset
3951 register, is saved at sp - 32 (in this function's frame)
3952 when generating PIC code. FIXME: What is the correct thing
3953 to do for functions which make no calls and allocate no
3954 frame? Do we need to allocate a frame, or can we just omit
3955 the save? For now we'll just omit the save.
3957 We don't want a note on this insn as the frame marker can
3958 move if there is a dynamic stack allocation. */
3959 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3961 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3963 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3967 /* Align pointer properly (doubleword boundary). */
3968 offset = (offset + 7) & ~7;
3970 /* Floating point register store. */
3975 /* First get the frame or stack pointer to the start of the FP register
3977 if (frame_pointer_needed)
3979 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
3980 base = hard_frame_pointer_rtx;
3984 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3985 base = stack_pointer_rtx;
3988 /* Now actually save the FP registers. */
3989 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3991 if (df_regs_ever_live_p (i)
3992 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3994 rtx addr, insn, reg;
3995 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3996 reg = gen_rtx_REG (DFmode, i);
3997 insn = emit_move_insn (addr, reg);
4000 RTX_FRAME_RELATED_P (insn) = 1;
4003 rtx mem = gen_rtx_MEM (DFmode,
4004 plus_constant (base, offset));
4005 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4006 gen_rtx_SET (VOIDmode, mem, reg));
4010 rtx meml = gen_rtx_MEM (SFmode,
4011 plus_constant (base, offset));
4012 rtx memr = gen_rtx_MEM (SFmode,
4013 plus_constant (base, offset + 4));
4014 rtx regl = gen_rtx_REG (SFmode, i);
4015 rtx regr = gen_rtx_REG (SFmode, i + 1);
4016 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
4017 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
4020 RTX_FRAME_RELATED_P (setl) = 1;
4021 RTX_FRAME_RELATED_P (setr) = 1;
4022 vec = gen_rtvec (2, setl, setr);
4023 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4024 gen_rtx_SEQUENCE (VOIDmode, vec));
4027 offset += GET_MODE_SIZE (DFmode);
4034 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4035 Handle case where DISP > 8k by using the add_high_const patterns. */
4038 load_reg (int reg, HOST_WIDE_INT disp, int base)
4040 rtx dest = gen_rtx_REG (word_mode, reg);
4041 rtx basereg = gen_rtx_REG (Pmode, base);
4044 if (VAL_14_BITS_P (disp))
4045 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
4046 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4048 rtx delta = GEN_INT (disp);
4049 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4051 emit_move_insn (tmpreg, delta);
4052 if (TARGET_DISABLE_INDEXING)
4054 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4055 src = gen_rtx_MEM (word_mode, tmpreg);
4058 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4062 rtx delta = GEN_INT (disp);
4063 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4064 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4066 emit_move_insn (tmpreg, high);
4067 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4070 emit_move_insn (dest, src);
4073 /* Update the total code bytes output to the text section. */
4076 update_total_code_bytes (unsigned int nbytes)
4078 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4079 && !IN_NAMED_SECTION_P (cfun->decl))
4081 unsigned int old_total = total_code_bytes;
4083 total_code_bytes += nbytes;
4085 /* Be prepared to handle overflows. */
4086 if (old_total > total_code_bytes)
4087 total_code_bytes = UINT_MAX;
4091 /* This function generates the assembly code for function exit.
4092 Args are as for output_function_prologue ().
4094 The function epilogue should not depend on the current stack
4095 pointer! It should use the frame pointer only. This is mandatory
4096 because of alloca; we also take advantage of it to omit stack
4097 adjustments before returning. */
4100 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4102 rtx insn = get_last_insn ();
4106 /* hppa_expand_epilogue does the dirty work now. We just need
4107 to output the assembler directives which denote the end
4110 To make debuggers happy, emit a nop if the epilogue was completely
4111 eliminated due to a volatile call as the last insn in the
4112 current function. That way the return address (in %r2) will
4113 always point to a valid instruction in the current function. */
4115 /* Get the last real insn. */
4116 if (GET_CODE (insn) == NOTE)
4117 insn = prev_real_insn (insn);
4119 /* If it is a sequence, then look inside. */
4120 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4121 insn = XVECEXP (PATTERN (insn), 0, 0);
4123 /* If insn is a CALL_INSN, then it must be a call to a volatile
4124 function (otherwise there would be epilogue insns). */
4125 if (insn && GET_CODE (insn) == CALL_INSN)
4127 fputs ("\tnop\n", file);
4131 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4133 if (TARGET_SOM && TARGET_GAS)
4135 /* We done with this subspace except possibly for some additional
4136 debug information. Forget that we are in this subspace to ensure
4137 that the next function is output in its own subspace. */
4139 cfun->machine->in_nsubspa = 2;
4142 if (INSN_ADDRESSES_SET_P ())
4144 insn = get_last_nonnote_insn ();
4145 last_address += INSN_ADDRESSES (INSN_UID (insn));
4147 last_address += insn_default_length (insn);
4148 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4149 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4152 last_address = UINT_MAX;
4154 /* Finally, update the total number of code bytes output so far. */
4155 update_total_code_bytes (last_address);
4159 hppa_expand_epilogue (void)
4162 HOST_WIDE_INT offset;
4163 HOST_WIDE_INT ret_off = 0;
4165 int merge_sp_adjust_with_load = 0;
4167 /* We will use this often. */
4168 tmpreg = gen_rtx_REG (word_mode, 1);
4170 /* Try to restore RP early to avoid load/use interlocks when
4171 RP gets used in the return (bv) instruction. This appears to still
4172 be necessary even when we schedule the prologue and epilogue. */
4175 ret_off = TARGET_64BIT ? -16 : -20;
4176 if (frame_pointer_needed)
4178 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4183 /* No frame pointer, and stack is smaller than 8k. */
4184 if (VAL_14_BITS_P (ret_off - actual_fsize))
4186 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4192 /* General register restores. */
4193 if (frame_pointer_needed)
4195 offset = local_fsize;
4197 /* If the current function calls __builtin_eh_return, then we need
4198 to restore the saved EH data registers. */
4199 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4201 unsigned int i, regno;
4205 regno = EH_RETURN_DATA_REGNO (i);
4206 if (regno == INVALID_REGNUM)
4209 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4210 offset += UNITS_PER_WORD;
4214 for (i = 18; i >= 4; i--)
4215 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4217 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4218 offset += UNITS_PER_WORD;
4223 offset = local_fsize - actual_fsize;
4225 /* If the current function calls __builtin_eh_return, then we need
4226 to restore the saved EH data registers. */
4227 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4229 unsigned int i, regno;
4233 regno = EH_RETURN_DATA_REGNO (i);
4234 if (regno == INVALID_REGNUM)
4237 /* Only for the first load.
4238 merge_sp_adjust_with_load holds the register load
4239 with which we will merge the sp adjustment. */
4240 if (merge_sp_adjust_with_load == 0
4242 && VAL_14_BITS_P (-actual_fsize))
4243 merge_sp_adjust_with_load = regno;
4245 load_reg (regno, offset, STACK_POINTER_REGNUM);
4246 offset += UNITS_PER_WORD;
4250 for (i = 18; i >= 3; i--)
4252 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4254 /* Only for the first load.
4255 merge_sp_adjust_with_load holds the register load
4256 with which we will merge the sp adjustment. */
4257 if (merge_sp_adjust_with_load == 0
4259 && VAL_14_BITS_P (-actual_fsize))
4260 merge_sp_adjust_with_load = i;
4262 load_reg (i, offset, STACK_POINTER_REGNUM);
4263 offset += UNITS_PER_WORD;
4268 /* Align pointer properly (doubleword boundary). */
4269 offset = (offset + 7) & ~7;
4271 /* FP register restores. */
4274 /* Adjust the register to index off of. */
4275 if (frame_pointer_needed)
4276 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4278 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4280 /* Actually do the restores now. */
4281 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4282 if (df_regs_ever_live_p (i)
4283 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4285 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4286 rtx dest = gen_rtx_REG (DFmode, i);
4287 emit_move_insn (dest, src);
4291 /* Emit a blockage insn here to keep these insns from being moved to
4292 an earlier spot in the epilogue, or into the main instruction stream.
4294 This is necessary as we must not cut the stack back before all the
4295 restores are finished. */
4296 emit_insn (gen_blockage ());
4298 /* Reset stack pointer (and possibly frame pointer). The stack
4299 pointer is initially set to fp + 64 to avoid a race condition. */
4300 if (frame_pointer_needed)
4302 rtx delta = GEN_INT (-64);
4304 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4305 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4306 stack_pointer_rtx, delta));
4308 /* If we were deferring a callee register restore, do it now. */
4309 else if (merge_sp_adjust_with_load)
4311 rtx delta = GEN_INT (-actual_fsize);
4312 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4314 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4316 else if (actual_fsize != 0)
4317 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4320 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4321 frame greater than 8k), do so now. */
4323 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4325 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4327 rtx sa = EH_RETURN_STACKADJ_RTX;
4329 emit_insn (gen_blockage ());
4330 emit_insn (TARGET_64BIT
4331 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4332 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4337 hppa_pic_save_rtx (void)
4339 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4342 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4343 #define NO_DEFERRED_PROFILE_COUNTERS 0
4347 /* Vector of funcdef numbers. */
4348 static VEC(int,heap) *funcdef_nos;
4350 /* Output deferred profile counters. */
4352 output_deferred_profile_counters (void)
4357 if (VEC_empty (int, funcdef_nos))
4360 switch_to_section (data_section);
4361 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4362 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4364 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4366 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4367 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4370 VEC_free (int, heap, funcdef_nos);
4374 hppa_profile_hook (int label_no)
4376 /* We use SImode for the address of the function in both 32 and
4377 64-bit code to avoid having to provide DImode versions of the
4378 lcla2 and load_offset_label_address insn patterns. */
4379 rtx reg = gen_reg_rtx (SImode);
4380 rtx label_rtx = gen_label_rtx ();
4381 rtx begin_label_rtx, call_insn;
4382 char begin_label_name[16];
4384 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4386 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4389 emit_move_insn (arg_pointer_rtx,
4390 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4393 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4395 /* The address of the function is loaded into %r25 with an instruction-
4396 relative sequence that avoids the use of relocations. The sequence
4397 is split so that the load_offset_label_address instruction can
4398 occupy the delay slot of the call to _mcount. */
4400 emit_insn (gen_lcla2 (reg, label_rtx));
4402 emit_insn (gen_lcla1 (reg, label_rtx));
4404 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4405 reg, begin_label_rtx, label_rtx));
4407 #if !NO_DEFERRED_PROFILE_COUNTERS
4409 rtx count_label_rtx, addr, r24;
4410 char count_label_name[16];
4412 VEC_safe_push (int, heap, funcdef_nos, label_no);
4413 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4414 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4416 addr = force_reg (Pmode, count_label_rtx);
4417 r24 = gen_rtx_REG (Pmode, 24);
4418 emit_move_insn (r24, addr);
4421 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4422 gen_rtx_SYMBOL_REF (Pmode,
4424 GEN_INT (TARGET_64BIT ? 24 : 12)));
4426 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4431 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4432 gen_rtx_SYMBOL_REF (Pmode,
4434 GEN_INT (TARGET_64BIT ? 16 : 8)));
4438 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4439 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4441 /* Indicate the _mcount call cannot throw, nor will it execute a
4443 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4446 /* Fetch the return address for the frame COUNT steps up from
4447 the current frame, after the prologue. FRAMEADDR is the
4448 frame pointer of the COUNT frame.
4450 We want to ignore any export stub remnants here. To handle this,
4451 we examine the code at the return address, and if it is an export
4452 stub, we return a memory rtx for the stub return address stored
4455 The value returned is used in two different ways:
4457 1. To find a function's caller.
4459 2. To change the return address for a function.
4461 This function handles most instances of case 1; however, it will
4462 fail if there are two levels of stubs to execute on the return
4463 path. The only way I believe that can happen is if the return value
4464 needs a parameter relocation, which never happens for C code.
4466 This function handles most instances of case 2; however, it will
4467 fail if we did not originally have stub code on the return path
4468 but will need stub code on the new return path. This can happen if
4469 the caller & callee are both in the main program, but the new
4470 return location is in a shared library. */
4473 return_addr_rtx (int count, rtx frameaddr)
4480 /* Instruction stream at the normal return address for the export stub:
4482 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4483 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4484 0x00011820 | stub+16: mtsp r1,sr0
4485 0xe0400002 | stub+20: be,n 0(sr0,rp)
4487 0xe0400002 must be specified as -532676606 so that it won't be
4488 rejected as an invalid immediate operand on 64-bit hosts. */
4490 HOST_WIDE_INT insns[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4496 rp = get_hard_reg_initial_val (Pmode, 2);
4498 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4501 /* If there is no export stub then just use the value saved from
4502 the return pointer register. */
4504 saved_rp = gen_reg_rtx (Pmode);
4505 emit_move_insn (saved_rp, rp);
4507 /* Get pointer to the instruction stream. We have to mask out the
4508 privilege level from the two low order bits of the return address
4509 pointer here so that ins will point to the start of the first
4510 instruction that would have been executed if we returned. */
4511 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4512 label = gen_label_rtx ();
4514 /* Check the instruction stream at the normal return address for the
4515 export stub. If it is an export stub, than our return address is
4516 really in -24[frameaddr]. */
4518 for (i = 0; i < 3; i++)
4520 rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4));
4521 rtx op1 = GEN_INT (insns[i]);
4522 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4525 /* Here we know that our return address points to an export
4526 stub. We don't want to return the address of the export stub,
4527 but rather the return address of the export stub. That return
4528 address is stored at -24[frameaddr]. */
4530 emit_move_insn (saved_rp,
4532 memory_address (Pmode,
4533 plus_constant (frameaddr,
4542 emit_bcond_fp (rtx operands[])
4544 enum rtx_code code = GET_CODE (operands[0]);
4545 rtx operand0 = operands[1];
4546 rtx operand1 = operands[2];
4547 rtx label = operands[3];
4549 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4550 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4552 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4553 gen_rtx_IF_THEN_ELSE (VOIDmode,
4556 gen_rtx_REG (CCFPmode, 0),
4558 gen_rtx_LABEL_REF (VOIDmode, label),
4563 /* Adjust the cost of a scheduling dependency. Return the new cost of
4564 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4567 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4569 enum attr_type attr_type;
4571 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4572 true dependencies as they are described with bypasses now. */
4573 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4576 if (! recog_memoized (insn))
4579 attr_type = get_attr_type (insn);
4581 switch (REG_NOTE_KIND (link))
4584 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4587 if (attr_type == TYPE_FPLOAD)
4589 rtx pat = PATTERN (insn);
4590 rtx dep_pat = PATTERN (dep_insn);
4591 if (GET_CODE (pat) == PARALLEL)
4593 /* This happens for the fldXs,mb patterns. */
4594 pat = XVECEXP (pat, 0, 0);
4596 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4597 /* If this happens, we have to extend this to schedule
4598 optimally. Return 0 for now. */
4601 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4603 if (! recog_memoized (dep_insn))
4605 switch (get_attr_type (dep_insn))
4612 case TYPE_FPSQRTSGL:
4613 case TYPE_FPSQRTDBL:
4614 /* A fpload can't be issued until one cycle before a
4615 preceding arithmetic operation has finished if
4616 the target of the fpload is any of the sources
4617 (or destination) of the arithmetic operation. */
4618 return insn_default_latency (dep_insn) - 1;
4625 else if (attr_type == TYPE_FPALU)
4627 rtx pat = PATTERN (insn);
4628 rtx dep_pat = PATTERN (dep_insn);
4629 if (GET_CODE (pat) == PARALLEL)
4631 /* This happens for the fldXs,mb patterns. */
4632 pat = XVECEXP (pat, 0, 0);
4634 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4635 /* If this happens, we have to extend this to schedule
4636 optimally. Return 0 for now. */
4639 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4641 if (! recog_memoized (dep_insn))
4643 switch (get_attr_type (dep_insn))
4647 case TYPE_FPSQRTSGL:
4648 case TYPE_FPSQRTDBL:
4649 /* An ALU flop can't be issued until two cycles before a
4650 preceding divide or sqrt operation has finished if
4651 the target of the ALU flop is any of the sources
4652 (or destination) of the divide or sqrt operation. */
4653 return insn_default_latency (dep_insn) - 2;
4661 /* For other anti dependencies, the cost is 0. */
4664 case REG_DEP_OUTPUT:
4665 /* Output dependency; DEP_INSN writes a register that INSN writes some
4667 if (attr_type == TYPE_FPLOAD)
4669 rtx pat = PATTERN (insn);
4670 rtx dep_pat = PATTERN (dep_insn);
4671 if (GET_CODE (pat) == PARALLEL)
4673 /* This happens for the fldXs,mb patterns. */
4674 pat = XVECEXP (pat, 0, 0);
4676 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4677 /* If this happens, we have to extend this to schedule
4678 optimally. Return 0 for now. */
4681 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4683 if (! recog_memoized (dep_insn))
4685 switch (get_attr_type (dep_insn))
4692 case TYPE_FPSQRTSGL:
4693 case TYPE_FPSQRTDBL:
4694 /* A fpload can't be issued until one cycle before a
4695 preceding arithmetic operation has finished if
4696 the target of the fpload is the destination of the
4697 arithmetic operation.
4699 Exception: For PA7100LC, PA7200 and PA7300, the cost
4700 is 3 cycles, unless they bundle together. We also
4701 pay the penalty if the second insn is a fpload. */
4702 return insn_default_latency (dep_insn) - 1;
4709 else if (attr_type == TYPE_FPALU)
4711 rtx pat = PATTERN (insn);
4712 rtx dep_pat = PATTERN (dep_insn);
4713 if (GET_CODE (pat) == PARALLEL)
4715 /* This happens for the fldXs,mb patterns. */
4716 pat = XVECEXP (pat, 0, 0);
4718 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4719 /* If this happens, we have to extend this to schedule
4720 optimally. Return 0 for now. */
4723 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4725 if (! recog_memoized (dep_insn))
4727 switch (get_attr_type (dep_insn))
4731 case TYPE_FPSQRTSGL:
4732 case TYPE_FPSQRTDBL:
4733 /* An ALU flop can't be issued until two cycles before a
4734 preceding divide or sqrt operation has finished if
4735 the target of the ALU flop is also the target of
4736 the divide or sqrt operation. */
4737 return insn_default_latency (dep_insn) - 2;
4745 /* For other output dependencies, the cost is 0. */
4753 /* Adjust scheduling priorities. We use this to try and keep addil
4754 and the next use of %r1 close together. */
4756 pa_adjust_priority (rtx insn, int priority)
4758 rtx set = single_set (insn);
4762 src = SET_SRC (set);
4763 dest = SET_DEST (set);
4764 if (GET_CODE (src) == LO_SUM
4765 && symbolic_operand (XEXP (src, 1), VOIDmode)
4766 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4769 else if (GET_CODE (src) == MEM
4770 && GET_CODE (XEXP (src, 0)) == LO_SUM
4771 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4772 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4775 else if (GET_CODE (dest) == MEM
4776 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4777 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4778 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4784 /* The 700 can only issue a single insn at a time.
4785 The 7XXX processors can issue two insns at a time.
4786 The 8000 can issue 4 insns at a time. */
4788 pa_issue_rate (void)
4792 case PROCESSOR_700: return 1;
4793 case PROCESSOR_7100: return 2;
4794 case PROCESSOR_7100LC: return 2;
4795 case PROCESSOR_7200: return 2;
4796 case PROCESSOR_7300: return 2;
4797 case PROCESSOR_8000: return 4;
4806 /* Return any length adjustment needed by INSN which already has its length
4807 computed as LENGTH. Return zero if no adjustment is necessary.
4809 For the PA: function calls, millicode calls, and backwards short
4810 conditional branches with unfilled delay slots need an adjustment by +1
4811 (to account for the NOP which will be inserted into the instruction stream).
4813 Also compute the length of an inline block move here as it is too
4814 complicated to express as a length attribute in pa.md. */
4816 pa_adjust_insn_length (rtx insn, int length)
4818 rtx pat = PATTERN (insn);
4820 /* Jumps inside switch tables which have unfilled delay slots need
4822 if (GET_CODE (insn) == JUMP_INSN
4823 && GET_CODE (pat) == PARALLEL
4824 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4826 /* Millicode insn with an unfilled delay slot. */
4827 else if (GET_CODE (insn) == INSN
4828 && GET_CODE (pat) != SEQUENCE
4829 && GET_CODE (pat) != USE
4830 && GET_CODE (pat) != CLOBBER
4831 && get_attr_type (insn) == TYPE_MILLI)
4833 /* Block move pattern. */
4834 else if (GET_CODE (insn) == INSN
4835 && GET_CODE (pat) == PARALLEL
4836 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4837 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4838 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4839 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4840 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4841 return compute_movmem_length (insn) - 4;
4842 /* Block clear pattern. */
4843 else if (GET_CODE (insn) == INSN
4844 && GET_CODE (pat) == PARALLEL
4845 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4846 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4847 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4848 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4849 return compute_clrmem_length (insn) - 4;
4850 /* Conditional branch with an unfilled delay slot. */
4851 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4853 /* Adjust a short backwards conditional with an unfilled delay slot. */
4854 if (GET_CODE (pat) == SET
4856 && JUMP_LABEL (insn) != NULL_RTX
4857 && ! forward_branch_p (insn))
4859 else if (GET_CODE (pat) == PARALLEL
4860 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4863 /* Adjust dbra insn with short backwards conditional branch with
4864 unfilled delay slot -- only for case where counter is in a
4865 general register register. */
4866 else if (GET_CODE (pat) == PARALLEL
4867 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4868 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4869 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4871 && ! forward_branch_p (insn))
4879 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
4882 pa_print_operand_punct_valid_p (unsigned char code)
4893 /* Print operand X (an rtx) in assembler syntax to file FILE.
4894 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4895 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4898 print_operand (FILE *file, rtx x, int code)
4903 /* Output a 'nop' if there's nothing for the delay slot. */
4904 if (dbr_sequence_length () == 0)
4905 fputs ("\n\tnop", file);
4908 /* Output a nullification completer if there's nothing for the */
4909 /* delay slot or nullification is requested. */
4910 if (dbr_sequence_length () == 0 ||
4912 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4916 /* Print out the second register name of a register pair.
4917 I.e., R (6) => 7. */
4918 fputs (reg_names[REGNO (x) + 1], file);
4921 /* A register or zero. */
4923 || (x == CONST0_RTX (DFmode))
4924 || (x == CONST0_RTX (SFmode)))
4926 fputs ("%r0", file);
4932 /* A register or zero (floating point). */
4934 || (x == CONST0_RTX (DFmode))
4935 || (x == CONST0_RTX (SFmode)))
4937 fputs ("%fr0", file);
4946 xoperands[0] = XEXP (XEXP (x, 0), 0);
4947 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4948 output_global_address (file, xoperands[1], 0);
4949 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4953 case 'C': /* Plain (C)ondition */
4955 switch (GET_CODE (x))
4958 fputs ("=", file); break;
4960 fputs ("<>", file); break;
4962 fputs (">", file); break;
4964 fputs (">=", file); break;
4966 fputs (">>=", file); break;
4968 fputs (">>", file); break;
4970 fputs ("<", file); break;
4972 fputs ("<=", file); break;
4974 fputs ("<<=", file); break;
4976 fputs ("<<", file); break;
4981 case 'N': /* Condition, (N)egated */
4982 switch (GET_CODE (x))
4985 fputs ("<>", file); break;
4987 fputs ("=", file); break;
4989 fputs ("<=", file); break;
4991 fputs ("<", file); break;
4993 fputs ("<<", file); break;
4995 fputs ("<<=", file); break;
4997 fputs (">=", file); break;
4999 fputs (">", file); break;
5001 fputs (">>", file); break;
5003 fputs (">>=", file); break;
5008 /* For floating point comparisons. Note that the output
5009 predicates are the complement of the desired mode. The
5010 conditions for GT, GE, LT, LE and LTGT cause an invalid
5011 operation exception if the result is unordered and this
5012 exception is enabled in the floating-point status register. */
5014 switch (GET_CODE (x))
5017 fputs ("!=", file); break;
5019 fputs ("=", file); break;
5021 fputs ("!>", file); break;
5023 fputs ("!>=", file); break;
5025 fputs ("!<", file); break;
5027 fputs ("!<=", file); break;
5029 fputs ("!<>", file); break;
5031 fputs ("!?<=", file); break;
5033 fputs ("!?<", file); break;
5035 fputs ("!?>=", file); break;
5037 fputs ("!?>", file); break;
5039 fputs ("!?=", file); break;
5041 fputs ("!?", file); break;
5043 fputs ("?", file); break;
5048 case 'S': /* Condition, operands are (S)wapped. */
5049 switch (GET_CODE (x))
5052 fputs ("=", file); break;
5054 fputs ("<>", file); break;
5056 fputs ("<", file); break;
5058 fputs ("<=", file); break;
5060 fputs ("<<=", file); break;
5062 fputs ("<<", file); break;
5064 fputs (">", file); break;
5066 fputs (">=", file); break;
5068 fputs (">>=", file); break;
5070 fputs (">>", file); break;
5075 case 'B': /* Condition, (B)oth swapped and negate. */
5076 switch (GET_CODE (x))
5079 fputs ("<>", file); break;
5081 fputs ("=", file); break;
5083 fputs (">=", file); break;
5085 fputs (">", file); break;
5087 fputs (">>", file); break;
5089 fputs (">>=", file); break;
5091 fputs ("<=", file); break;
5093 fputs ("<", file); break;
5095 fputs ("<<", file); break;
5097 fputs ("<<=", file); break;
5103 gcc_assert (GET_CODE (x) == CONST_INT);
5104 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5107 gcc_assert (GET_CODE (x) == CONST_INT);
5108 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5111 gcc_assert (GET_CODE (x) == CONST_INT);
5112 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5115 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5116 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5119 gcc_assert (GET_CODE (x) == CONST_INT);
5120 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5123 gcc_assert (GET_CODE (x) == CONST_INT);
5124 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5127 if (GET_CODE (x) == CONST_INT)
5132 switch (GET_CODE (XEXP (x, 0)))
5136 if (ASSEMBLER_DIALECT == 0)
5137 fputs ("s,mb", file);
5139 fputs (",mb", file);
5143 if (ASSEMBLER_DIALECT == 0)
5144 fputs ("s,ma", file);
5146 fputs (",ma", file);
5149 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5150 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5152 if (ASSEMBLER_DIALECT == 0)
5155 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5156 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5158 if (ASSEMBLER_DIALECT == 0)
5159 fputs ("x,s", file);
5163 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5167 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5173 output_global_address (file, x, 0);
5176 output_global_address (file, x, 1);
5178 case 0: /* Don't do anything special */
5183 compute_zdepwi_operands (INTVAL (x), op);
5184 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5190 compute_zdepdi_operands (INTVAL (x), op);
5191 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5195 /* We can get here from a .vtable_inherit due to our
5196 CONSTANT_ADDRESS_P rejecting perfectly good constant
5202 if (GET_CODE (x) == REG)
5204 fputs (reg_names [REGNO (x)], file);
5205 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5211 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5212 && (REGNO (x) & 1) == 0)
5215 else if (GET_CODE (x) == MEM)
5217 int size = GET_MODE_SIZE (GET_MODE (x));
5218 rtx base = NULL_RTX;
5219 switch (GET_CODE (XEXP (x, 0)))
5223 base = XEXP (XEXP (x, 0), 0);
5224 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5228 base = XEXP (XEXP (x, 0), 0);
5229 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5232 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5233 fprintf (file, "%s(%s)",
5234 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5235 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5236 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5237 fprintf (file, "%s(%s)",
5238 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5239 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5240 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5241 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5243 /* Because the REG_POINTER flag can get lost during reload,
5244 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5245 index and base registers in the combined move patterns. */
5246 rtx base = XEXP (XEXP (x, 0), 1);
5247 rtx index = XEXP (XEXP (x, 0), 0);
5249 fprintf (file, "%s(%s)",
5250 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5253 output_address (XEXP (x, 0));
5256 output_address (XEXP (x, 0));
5261 output_addr_const (file, x);
5264 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5267 output_global_address (FILE *file, rtx x, int round_constant)
5270 /* Imagine (high (const (plus ...))). */
5271 if (GET_CODE (x) == HIGH)
5274 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5275 output_addr_const (file, x);
5276 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5278 output_addr_const (file, x);
5279 fputs ("-$global$", file);
5281 else if (GET_CODE (x) == CONST)
5283 const char *sep = "";
5284 int offset = 0; /* assembler wants -$global$ at end */
5285 rtx base = NULL_RTX;
5287 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5290 base = XEXP (XEXP (x, 0), 0);
5291 output_addr_const (file, base);
5294 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5300 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5303 base = XEXP (XEXP (x, 0), 1);
5304 output_addr_const (file, base);
5307 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5313 /* How bogus. The compiler is apparently responsible for
5314 rounding the constant if it uses an LR field selector.
5316 The linker and/or assembler seem a better place since
5317 they have to do this kind of thing already.
5319 If we fail to do this, HP's optimizing linker may eliminate
5320 an addil, but not update the ldw/stw/ldo instruction that
5321 uses the result of the addil. */
5323 offset = ((offset + 0x1000) & ~0x1fff);
5325 switch (GET_CODE (XEXP (x, 0)))
5338 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5346 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5347 fputs ("-$global$", file);
5349 fprintf (file, "%s%d", sep, offset);
5352 output_addr_const (file, x);
5355 /* Output boilerplate text to appear at the beginning of the file.
5356 There are several possible versions. */
5357 #define aputs(x) fputs(x, asm_out_file)
5359 pa_file_start_level (void)
5362 aputs ("\t.LEVEL 2.0w\n");
5363 else if (TARGET_PA_20)
5364 aputs ("\t.LEVEL 2.0\n");
5365 else if (TARGET_PA_11)
5366 aputs ("\t.LEVEL 1.1\n");
5368 aputs ("\t.LEVEL 1.0\n");
5372 pa_file_start_space (int sortspace)
5374 aputs ("\t.SPACE $PRIVATE$");
5377 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5378 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5379 "\n\t.SPACE $TEXT$");
5382 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5383 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5387 pa_file_start_file (int want_version)
5389 if (write_symbols != NO_DEBUG)
5391 output_file_directive (asm_out_file, main_input_filename);
5393 aputs ("\t.version\t\"01.01\"\n");
5398 pa_file_start_mcount (const char *aswhat)
5401 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5405 pa_elf_file_start (void)
5407 pa_file_start_level ();
5408 pa_file_start_mcount ("ENTRY");
5409 pa_file_start_file (0);
5413 pa_som_file_start (void)
5415 pa_file_start_level ();
5416 pa_file_start_space (0);
5417 aputs ("\t.IMPORT $global$,DATA\n"
5418 "\t.IMPORT $$dyncall,MILLICODE\n");
5419 pa_file_start_mcount ("CODE");
5420 pa_file_start_file (0);
5424 pa_linux_file_start (void)
5426 pa_file_start_file (1);
5427 pa_file_start_level ();
5428 pa_file_start_mcount ("CODE");
5432 pa_hpux64_gas_file_start (void)
5434 pa_file_start_level ();
5435 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5437 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5439 pa_file_start_file (1);
5443 pa_hpux64_hpas_file_start (void)
5445 pa_file_start_level ();
5446 pa_file_start_space (1);
5447 pa_file_start_mcount ("CODE");
5448 pa_file_start_file (0);
5452 /* Search the deferred plabel list for SYMBOL and return its internal
5453 label. If an entry for SYMBOL is not found, a new entry is created. */
5456 get_deferred_plabel (rtx symbol)
5458 const char *fname = XSTR (symbol, 0);
5461 /* See if we have already put this function on the list of deferred
5462 plabels. This list is generally small, so a liner search is not
5463 too ugly. If it proves too slow replace it with something faster. */
5464 for (i = 0; i < n_deferred_plabels; i++)
5465 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5468 /* If the deferred plabel list is empty, or this entry was not found
5469 on the list, create a new entry on the list. */
5470 if (deferred_plabels == NULL || i == n_deferred_plabels)
5474 if (deferred_plabels == 0)
5475 deferred_plabels = ggc_alloc_deferred_plabel ();
5477 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5479 n_deferred_plabels + 1);
5481 i = n_deferred_plabels++;
5482 deferred_plabels[i].internal_label = gen_label_rtx ();
5483 deferred_plabels[i].symbol = symbol;
5485 /* Gross. We have just implicitly taken the address of this
5486 function. Mark it in the same manner as assemble_name. */
5487 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5489 mark_referenced (id);
5492 return deferred_plabels[i].internal_label;
5496 output_deferred_plabels (void)
5500 /* If we have some deferred plabels, then we need to switch into the
5501 data or readonly data section, and align it to a 4 byte boundary
5502 before outputting the deferred plabels. */
5503 if (n_deferred_plabels)
5505 switch_to_section (flag_pic ? data_section : readonly_data_section);
5506 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5509 /* Now output the deferred plabels. */
5510 for (i = 0; i < n_deferred_plabels; i++)
5512 targetm.asm_out.internal_label (asm_out_file, "L",
5513 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5514 assemble_integer (deferred_plabels[i].symbol,
5515 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5519 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5520 /* Initialize optabs to point to HPUX long double emulation routines. */
5522 pa_hpux_init_libfuncs (void)
5524 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5525 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5526 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5527 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5528 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5529 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5530 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5531 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5532 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5534 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5535 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5536 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5537 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5538 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5539 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5540 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5542 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5543 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5544 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5545 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5547 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5548 ? "__U_Qfcnvfxt_quad_to_sgl"
5549 : "_U_Qfcnvfxt_quad_to_sgl");
5550 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5551 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5552 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5554 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5555 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5556 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5557 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5561 /* HP's millicode routines mean something special to the assembler.
5562 Keep track of which ones we have used. */
5564 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5565 static void import_milli (enum millicodes);
5566 static char imported[(int) end1000];
5567 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5568 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5569 #define MILLI_START 10
5572 import_milli (enum millicodes code)
5574 char str[sizeof (import_string)];
5576 if (!imported[(int) code])
5578 imported[(int) code] = 1;
5579 strcpy (str, import_string);
5580 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5581 output_asm_insn (str, 0);
5585 /* The register constraints have put the operands and return value in
5586 the proper registers. */
5589 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5591 import_milli (mulI);
5592 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5595 /* Emit the rtl for doing a division by a constant. */
5597 /* Do magic division millicodes exist for this value? */
5598 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5600 /* We'll use an array to keep track of the magic millicodes and
5601 whether or not we've used them already. [n][0] is signed, [n][1] is
5604 static int div_milli[16][2];
5607 emit_hpdiv_const (rtx *operands, int unsignedp)
5609 if (GET_CODE (operands[2]) == CONST_INT
5610 && INTVAL (operands[2]) > 0
5611 && INTVAL (operands[2]) < 16
5612 && magic_milli[INTVAL (operands[2])])
5614 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5616 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5620 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5621 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5623 gen_rtx_REG (SImode, 26),
5625 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5626 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5627 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5628 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5629 gen_rtx_CLOBBER (VOIDmode, ret))));
5630 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5637 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5641 /* If the divisor is a constant, try to use one of the special
5643 if (GET_CODE (operands[0]) == CONST_INT)
5645 static char buf[100];
5646 divisor = INTVAL (operands[0]);
5647 if (!div_milli[divisor][unsignedp])
5649 div_milli[divisor][unsignedp] = 1;
5651 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5653 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5657 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5658 INTVAL (operands[0]));
5659 return output_millicode_call (insn,
5660 gen_rtx_SYMBOL_REF (SImode, buf));
5664 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5665 INTVAL (operands[0]));
5666 return output_millicode_call (insn,
5667 gen_rtx_SYMBOL_REF (SImode, buf));
5670 /* Divisor isn't a special constant. */
5675 import_milli (divU);
5676 return output_millicode_call (insn,
5677 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5681 import_milli (divI);
5682 return output_millicode_call (insn,
5683 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5688 /* Output a $$rem millicode to do mod. */
5691 output_mod_insn (int unsignedp, rtx insn)
5695 import_milli (remU);
5696 return output_millicode_call (insn,
5697 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5701 import_milli (remI);
5702 return output_millicode_call (insn,
5703 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5708 output_arg_descriptor (rtx call_insn)
5710 const char *arg_regs[4];
5711 enum machine_mode arg_mode;
5713 int i, output_flag = 0;
5716 /* We neither need nor want argument location descriptors for the
5717 64bit runtime environment or the ELF32 environment. */
5718 if (TARGET_64BIT || TARGET_ELF32)
5721 for (i = 0; i < 4; i++)
5724 /* Specify explicitly that no argument relocations should take place
5725 if using the portable runtime calling conventions. */
5726 if (TARGET_PORTABLE_RUNTIME)
5728 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5733 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5734 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5735 link; link = XEXP (link, 1))
5737 rtx use = XEXP (link, 0);
5739 if (! (GET_CODE (use) == USE
5740 && GET_CODE (XEXP (use, 0)) == REG
5741 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5744 arg_mode = GET_MODE (XEXP (use, 0));
5745 regno = REGNO (XEXP (use, 0));
5746 if (regno >= 23 && regno <= 26)
5748 arg_regs[26 - regno] = "GR";
5749 if (arg_mode == DImode)
5750 arg_regs[25 - regno] = "GR";
5752 else if (regno >= 32 && regno <= 39)
5754 if (arg_mode == SFmode)
5755 arg_regs[(regno - 32) / 2] = "FR";
5758 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5759 arg_regs[(regno - 34) / 2] = "FR";
5760 arg_regs[(regno - 34) / 2 + 1] = "FU";
5762 arg_regs[(regno - 34) / 2] = "FU";
5763 arg_regs[(regno - 34) / 2 + 1] = "FR";
5768 fputs ("\t.CALL ", asm_out_file);
5769 for (i = 0; i < 4; i++)
5774 fputc (',', asm_out_file);
5775 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5778 fputc ('\n', asm_out_file);
5782 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
5783 enum machine_mode mode, secondary_reload_info *sri)
5786 enum reg_class rclass = (enum reg_class) rclass_i;
5788 /* Handle the easy stuff first. */
5789 if (rclass == R1_REGS)
5795 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5801 /* If we have something like (mem (mem (...)), we can safely assume the
5802 inner MEM will end up in a general register after reloading, so there's
5803 no need for a secondary reload. */
5804 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5807 /* Trying to load a constant into a FP register during PIC code
5808 generation requires %r1 as a scratch register. */
5810 && (mode == SImode || mode == DImode)
5811 && FP_REG_CLASS_P (rclass)
5812 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5814 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5815 : CODE_FOR_reload_indi_r1);
5819 /* Secondary reloads of symbolic operands require %r1 as a scratch
5820 register when we're generating PIC code and when the operand isn't
5822 if (symbolic_expression_p (x))
5824 if (GET_CODE (x) == HIGH)
5827 if (flag_pic || !read_only_operand (x, VOIDmode))
5829 gcc_assert (mode == SImode || mode == DImode);
5830 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5831 : CODE_FOR_reload_indi_r1);
5836 /* Profiling showed the PA port spends about 1.3% of its compilation
5837 time in true_regnum from calls inside pa_secondary_reload_class. */
5838 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5839 regno = true_regnum (x);
5841 /* In order to allow 14-bit displacements in integer loads and stores,
5842 we need to prevent reload from generating out of range integer mode
5843 loads and stores to the floating point registers. Previously, we
5844 used to call for a secondary reload and have emit_move_sequence()
5845 fix the instruction sequence. However, reload occasionally wouldn't
5846 generate the reload and we would end up with an invalid REG+D memory
5847 address. So, now we use an intermediate general register for most
5848 memory loads and stores. */
5849 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5850 && GET_MODE_CLASS (mode) == MODE_INT
5851 && FP_REG_CLASS_P (rclass))
5853 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5854 the secondary reload needed for a pseudo. It never passes a
5856 if (GET_CODE (x) == MEM)
5860 /* We don't need an intermediate for indexed and LO_SUM DLT
5861 memory addresses. When INT14_OK_STRICT is true, it might
5862 appear that we could directly allow register indirect
5863 memory addresses. However, this doesn't work because we
5864 don't support SUBREGs in floating-point register copies
5865 and reload doesn't tell us when it's going to use a SUBREG. */
5866 if (IS_INDEX_ADDR_P (x)
5867 || IS_LO_SUM_DLT_ADDR_P (x))
5870 /* Otherwise, we need an intermediate general register. */
5871 return GENERAL_REGS;
5874 /* Request a secondary reload with a general scratch register
5875 for everthing else. ??? Could symbolic operands be handled
5876 directly when generating non-pic PA 2.0 code? */
5878 ? direct_optab_handler (reload_in_optab, mode)
5879 : direct_optab_handler (reload_out_optab, mode));
5883 /* We need a secondary register (GPR) for copies between the SAR
5884 and anything other than a general register. */
5885 if (rclass == SHIFT_REGS && (regno <= 0 || regno >= 32))
5888 ? direct_optab_handler (reload_in_optab, mode)
5889 : direct_optab_handler (reload_out_optab, mode));
5893 /* A SAR<->FP register copy requires a secondary register (GPR) as
5894 well as secondary memory. */
5895 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5896 && (REGNO_REG_CLASS (regno) == SHIFT_REGS
5897 && FP_REG_CLASS_P (rclass)))
5899 ? direct_optab_handler (reload_in_optab, mode)
5900 : direct_optab_handler (reload_out_optab, mode));
5905 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
5906 is only marked as live on entry by df-scan when it is a fixed
5907 register. It isn't a fixed register in the 64-bit runtime,
5908 so we need to mark it here. */
5911 pa_extra_live_on_entry (bitmap regs)
5914 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
5917 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
5918 to prevent it from being deleted. */
5921 pa_eh_return_handler_rtx (void)
5925 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
5926 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
5927 tmp = gen_rtx_MEM (word_mode, tmp);
5932 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5933 by invisible reference. As a GCC extension, we also pass anything
5934 with a zero or variable size by reference.
5936 The 64-bit runtime does not describe passing any types by invisible
5937 reference. The internals of GCC can't currently handle passing
5938 empty structures, and zero or variable length arrays when they are
5939 not passed entirely on the stack or by reference. Thus, as a GCC
5940 extension, we pass these types by reference. The HP compiler doesn't
5941 support these types, so hopefully there shouldn't be any compatibility
5942 issues. This may have to be revisited when HP releases a C99 compiler
5943 or updates the ABI. */
5946 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5947 enum machine_mode mode, const_tree type,
5948 bool named ATTRIBUTE_UNUSED)
5953 size = int_size_in_bytes (type);
5955 size = GET_MODE_SIZE (mode);
5960 return size <= 0 || size > 8;
5964 function_arg_padding (enum machine_mode mode, const_tree type)
5969 && (AGGREGATE_TYPE_P (type)
5970 || TREE_CODE (type) == COMPLEX_TYPE
5971 || TREE_CODE (type) == VECTOR_TYPE)))
5973 /* Return none if justification is not required. */
5975 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5976 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5979 /* The directions set here are ignored when a BLKmode argument larger
5980 than a word is placed in a register. Different code is used for
5981 the stack and registers. This makes it difficult to have a
5982 consistent data representation for both the stack and registers.
5983 For both runtimes, the justification and padding for arguments on
5984 the stack and in registers should be identical. */
5986 /* The 64-bit runtime specifies left justification for aggregates. */
5989 /* The 32-bit runtime architecture specifies right justification.
5990 When the argument is passed on the stack, the argument is padded
5991 with garbage on the left. The HP compiler pads with zeros. */
5995 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6002 /* Do what is necessary for `va_start'. We look at the current function
6003 to determine if stdargs or varargs is used and fill in an initial
6004 va_list. A pointer to this constructor is returned. */
6007 hppa_builtin_saveregs (void)
6010 tree fntype = TREE_TYPE (current_function_decl);
6011 int argadj = ((!stdarg_p (fntype))
6012 ? UNITS_PER_WORD : 0);
6015 offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
6017 offset = crtl->args.arg_offset_rtx;
6023 /* Adjust for varargs/stdarg differences. */
6025 offset = plus_constant (crtl->args.arg_offset_rtx, -argadj);
6027 offset = crtl->args.arg_offset_rtx;
6029 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6030 from the incoming arg pointer and growing to larger addresses. */
6031 for (i = 26, off = -64; i >= 19; i--, off += 8)
6032 emit_move_insn (gen_rtx_MEM (word_mode,
6033 plus_constant (arg_pointer_rtx, off)),
6034 gen_rtx_REG (word_mode, i));
6036 /* The incoming args pointer points just beyond the flushback area;
6037 normally this is not a serious concern. However, when we are doing
6038 varargs/stdargs we want to make the arg pointer point to the start
6039 of the incoming argument area. */
6040 emit_move_insn (virtual_incoming_args_rtx,
6041 plus_constant (arg_pointer_rtx, -64));
6043 /* Now return a pointer to the first anonymous argument. */
6044 return copy_to_reg (expand_binop (Pmode, add_optab,
6045 virtual_incoming_args_rtx,
6046 offset, 0, 0, OPTAB_LIB_WIDEN));
6049 /* Store general registers on the stack. */
6050 dest = gen_rtx_MEM (BLKmode,
6051 plus_constant (crtl->args.internal_arg_pointer,
6053 set_mem_alias_set (dest, get_varargs_alias_set ());
6054 set_mem_align (dest, BITS_PER_WORD);
6055 move_block_from_reg (23, dest, 4);
6057 /* move_block_from_reg will emit code to store the argument registers
6058 individually as scalar stores.
6060 However, other insns may later load from the same addresses for
6061 a structure load (passing a struct to a varargs routine).
6063 The alias code assumes that such aliasing can never happen, so we
6064 have to keep memory referencing insns from moving up beyond the
6065 last argument register store. So we emit a blockage insn here. */
6066 emit_insn (gen_blockage ());
6068 return copy_to_reg (expand_binop (Pmode, add_optab,
6069 crtl->args.internal_arg_pointer,
6070 offset, 0, 0, OPTAB_LIB_WIDEN));
6074 hppa_va_start (tree valist, rtx nextarg)
6076 nextarg = expand_builtin_saveregs ();
6077 std_expand_builtin_va_start (valist, nextarg);
6081 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6086 /* Args grow upward. We can use the generic routines. */
6087 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6089 else /* !TARGET_64BIT */
6091 tree ptr = build_pointer_type (type);
6094 unsigned int size, ofs;
6097 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6101 ptr = build_pointer_type (type);
6103 size = int_size_in_bytes (type);
6104 valist_type = TREE_TYPE (valist);
6106 /* Args grow down. Not handled by generic routines. */
6108 u = fold_convert (sizetype, size_in_bytes (type));
6109 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6110 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
6112 /* Align to 4 or 8 byte boundary depending on argument size. */
6114 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6115 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6116 t = fold_convert (valist_type, t);
6118 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6120 ofs = (8 - size) % 4;
6124 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
6127 t = fold_convert (ptr, t);
6128 t = build_va_arg_indirect_ref (t);
6131 t = build_va_arg_indirect_ref (t);
6137 /* True if MODE is valid for the target. By "valid", we mean able to
6138 be manipulated in non-trivial ways. In particular, this means all
6139 the arithmetic is supported.
6141 Currently, TImode is not valid as the HP 64-bit runtime documentation
6142 doesn't document the alignment and calling conventions for this type.
6143 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6144 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6147 pa_scalar_mode_supported_p (enum machine_mode mode)
6149 int precision = GET_MODE_PRECISION (mode);
6151 switch (GET_MODE_CLASS (mode))
6153 case MODE_PARTIAL_INT:
6155 if (precision == CHAR_TYPE_SIZE)
6157 if (precision == SHORT_TYPE_SIZE)
6159 if (precision == INT_TYPE_SIZE)
6161 if (precision == LONG_TYPE_SIZE)
6163 if (precision == LONG_LONG_TYPE_SIZE)
6168 if (precision == FLOAT_TYPE_SIZE)
6170 if (precision == DOUBLE_TYPE_SIZE)
6172 if (precision == LONG_DOUBLE_TYPE_SIZE)
6176 case MODE_DECIMAL_FLOAT:
6184 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6185 it branches to the next real instruction. Otherwise, return FALSE. */
6188 branch_to_delay_slot_p (rtx insn)
6190 if (dbr_sequence_length ())
6193 return next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn);
6196 /* Return TRUE if INSN, a jump insn, needs a nop in its delay slot.
6198 This occurs when INSN has an unfilled delay slot and is followed
6199 by an ASM_INPUT. Disaster can occur if the ASM_INPUT is empty and
6200 the jump branches into the delay slot. So, we add a nop in the delay
6201 slot just to be safe. This messes up our instruction count, but we
6202 don't know how big the ASM_INPUT insn is anyway. */
6205 branch_needs_nop_p (rtx insn)
6209 if (dbr_sequence_length ())
6212 next_insn = next_real_insn (insn);
6213 return GET_CODE (PATTERN (next_insn)) == ASM_INPUT;
6216 /* This routine handles all the normal conditional branch sequences we
6217 might need to generate. It handles compare immediate vs compare
6218 register, nullification of delay slots, varying length branches,
6219 negated branches, and all combinations of the above. It returns the
6220 output appropriate to emit the branch corresponding to all given
6224 output_cbranch (rtx *operands, int negated, rtx insn)
6226 static char buf[100];
6228 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6229 int length = get_attr_length (insn);
6232 /* A conditional branch to the following instruction (e.g. the delay slot)
6233 is asking for a disaster. This can happen when not optimizing and
6234 when jump optimization fails.
6236 While it is usually safe to emit nothing, this can fail if the
6237 preceding instruction is a nullified branch with an empty delay
6238 slot and the same branch target as this branch. We could check
6239 for this but jump optimization should eliminate nop jumps. It
6240 is always safe to emit a nop. */
6241 if (branch_to_delay_slot_p (insn))
6244 /* The doubleword form of the cmpib instruction doesn't have the LEU
6245 and GTU conditions while the cmpb instruction does. Since we accept
6246 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6247 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6248 operands[2] = gen_rtx_REG (DImode, 0);
6249 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6250 operands[1] = gen_rtx_REG (DImode, 0);
6252 /* If this is a long branch with its delay slot unfilled, set `nullify'
6253 as it can nullify the delay slot and save a nop. */
6254 if (length == 8 && dbr_sequence_length () == 0)
6257 /* If this is a short forward conditional branch which did not get
6258 its delay slot filled, the delay slot can still be nullified. */
6259 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6260 nullify = forward_branch_p (insn);
6262 /* A forward branch over a single nullified insn can be done with a
6263 comclr instruction. This avoids a single cycle penalty due to
6264 mis-predicted branch if we fall through (branch not taken). */
6266 && next_real_insn (insn) != 0
6267 && get_attr_length (next_real_insn (insn)) == 4
6268 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6274 /* All short conditional branches except backwards with an unfilled
6278 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6280 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6281 if (GET_MODE (operands[1]) == DImode)
6284 strcat (buf, "%B3");
6286 strcat (buf, "%S3");
6288 strcat (buf, " %2,%r1,%%r0");
6291 if (branch_needs_nop_p (insn))
6292 strcat (buf, ",n %2,%r1,%0%#");
6294 strcat (buf, ",n %2,%r1,%0");
6297 strcat (buf, " %2,%r1,%0");
6300 /* All long conditionals. Note a short backward branch with an
6301 unfilled delay slot is treated just like a long backward branch
6302 with an unfilled delay slot. */
6304 /* Handle weird backwards branch with a filled delay slot
6305 which is nullified. */
6306 if (dbr_sequence_length () != 0
6307 && ! forward_branch_p (insn)
6310 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6311 if (GET_MODE (operands[1]) == DImode)
6314 strcat (buf, "%S3");
6316 strcat (buf, "%B3");
6317 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6319 /* Handle short backwards branch with an unfilled delay slot.
6320 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6321 taken and untaken branches. */
6322 else if (dbr_sequence_length () == 0
6323 && ! forward_branch_p (insn)
6324 && INSN_ADDRESSES_SET_P ()
6325 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6326 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6328 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6329 if (GET_MODE (operands[1]) == DImode)
6332 strcat (buf, "%B3 %2,%r1,%0%#");
6334 strcat (buf, "%S3 %2,%r1,%0%#");
6338 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6339 if (GET_MODE (operands[1]) == DImode)
6342 strcat (buf, "%S3");
6344 strcat (buf, "%B3");
6346 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6348 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6353 /* The reversed conditional branch must branch over one additional
6354 instruction if the delay slot is filled and needs to be extracted
6355 by output_lbranch. If the delay slot is empty or this is a
6356 nullified forward branch, the instruction after the reversed
6357 condition branch must be nullified. */
6358 if (dbr_sequence_length () == 0
6359 || (nullify && forward_branch_p (insn)))
6363 operands[4] = GEN_INT (length);
6368 operands[4] = GEN_INT (length + 4);
6371 /* Create a reversed conditional branch which branches around
6372 the following insns. */
6373 if (GET_MODE (operands[1]) != DImode)
6379 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6382 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6388 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6391 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6400 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6403 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6409 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6412 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6416 output_asm_insn (buf, operands);
6417 return output_lbranch (operands[0], insn, xdelay);
6422 /* This routine handles output of long unconditional branches that
6423 exceed the maximum range of a simple branch instruction. Since
6424 we don't have a register available for the branch, we save register
6425 %r1 in the frame marker, load the branch destination DEST into %r1,
6426 execute the branch, and restore %r1 in the delay slot of the branch.
6428 Since long branches may have an insn in the delay slot and the
6429 delay slot is used to restore %r1, we in general need to extract
6430 this insn and execute it before the branch. However, to facilitate
6431 use of this function by conditional branches, we also provide an
6432 option to not extract the delay insn so that it will be emitted
6433 after the long branch. So, if there is an insn in the delay slot,
6434 it is extracted if XDELAY is nonzero.
6436 The lengths of the various long-branch sequences are 20, 16 and 24
6437 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6440 output_lbranch (rtx dest, rtx insn, int xdelay)
6444 xoperands[0] = dest;
6446 /* First, free up the delay slot. */
6447 if (xdelay && dbr_sequence_length () != 0)
6449 /* We can't handle a jump in the delay slot. */
6450 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6452 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6455 /* Now delete the delay insn. */
6456 SET_INSN_DELETED (NEXT_INSN (insn));
6459 /* Output an insn to save %r1. The runtime documentation doesn't
6460 specify whether the "Clean Up" slot in the callers frame can
6461 be clobbered by the callee. It isn't copied by HP's builtin
6462 alloca, so this suggests that it can be clobbered if necessary.
6463 The "Static Link" location is copied by HP builtin alloca, so
6464 we avoid using it. Using the cleanup slot might be a problem
6465 if we have to interoperate with languages that pass cleanup
6466 information. However, it should be possible to handle these
6467 situations with GCC's asm feature.
6469 The "Current RP" slot is reserved for the called procedure, so
6470 we try to use it when we don't have a frame of our own. It's
6471 rather unlikely that we won't have a frame when we need to emit
6474 Really the way to go long term is a register scavenger; goto
6475 the target of the jump and find a register which we can use
6476 as a scratch to hold the value in %r1. Then, we wouldn't have
6477 to free up the delay slot or clobber a slot that may be needed
6478 for other purposes. */
6481 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6482 /* Use the return pointer slot in the frame marker. */
6483 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6485 /* Use the slot at -40 in the frame marker since HP builtin
6486 alloca doesn't copy it. */
6487 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6491 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6492 /* Use the return pointer slot in the frame marker. */
6493 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6495 /* Use the "Clean Up" slot in the frame marker. In GCC,
6496 the only other use of this location is for copying a
6497 floating point double argument from a floating-point
6498 register to two general registers. The copy is done
6499 as an "atomic" operation when outputting a call, so it
6500 won't interfere with our using the location here. */
6501 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6504 if (TARGET_PORTABLE_RUNTIME)
6506 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6507 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6508 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6512 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6513 if (TARGET_SOM || !TARGET_GAS)
6515 xoperands[1] = gen_label_rtx ();
6516 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6517 targetm.asm_out.internal_label (asm_out_file, "L",
6518 CODE_LABEL_NUMBER (xoperands[1]));
6519 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6523 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6524 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6526 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6529 /* Now output a very long branch to the original target. */
6530 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6532 /* Now restore the value of %r1 in the delay slot. */
6535 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6536 return "ldd -16(%%r30),%%r1";
6538 return "ldd -40(%%r30),%%r1";
6542 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6543 return "ldw -20(%%r30),%%r1";
6545 return "ldw -12(%%r30),%%r1";
6549 /* This routine handles all the branch-on-bit conditional branch sequences we
6550 might need to generate. It handles nullification of delay slots,
6551 varying length branches, negated branches and all combinations of the
6552 above. it returns the appropriate output template to emit the branch. */
6555 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6557 static char buf[100];
6559 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6560 int length = get_attr_length (insn);
6563 /* A conditional branch to the following instruction (e.g. the delay slot) is
6564 asking for a disaster. I do not think this can happen as this pattern
6565 is only used when optimizing; jump optimization should eliminate the
6566 jump. But be prepared just in case. */
6568 if (branch_to_delay_slot_p (insn))
6571 /* If this is a long branch with its delay slot unfilled, set `nullify'
6572 as it can nullify the delay slot and save a nop. */
6573 if (length == 8 && dbr_sequence_length () == 0)
6576 /* If this is a short forward conditional branch which did not get
6577 its delay slot filled, the delay slot can still be nullified. */
6578 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6579 nullify = forward_branch_p (insn);
6581 /* A forward branch over a single nullified insn can be done with a
6582 extrs instruction. This avoids a single cycle penalty due to
6583 mis-predicted branch if we fall through (branch not taken). */
6586 && next_real_insn (insn) != 0
6587 && get_attr_length (next_real_insn (insn)) == 4
6588 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6595 /* All short conditional branches except backwards with an unfilled
6599 strcpy (buf, "{extrs,|extrw,s,}");
6601 strcpy (buf, "bb,");
6602 if (useskip && GET_MODE (operands[0]) == DImode)
6603 strcpy (buf, "extrd,s,*");
6604 else if (GET_MODE (operands[0]) == DImode)
6605 strcpy (buf, "bb,*");
6606 if ((which == 0 && negated)
6607 || (which == 1 && ! negated))
6612 strcat (buf, " %0,%1,1,%%r0");
6613 else if (nullify && negated)
6615 if (branch_needs_nop_p (insn))
6616 strcat (buf, ",n %0,%1,%3%#");
6618 strcat (buf, ",n %0,%1,%3");
6620 else if (nullify && ! negated)
6622 if (branch_needs_nop_p (insn))
6623 strcat (buf, ",n %0,%1,%2%#");
6625 strcat (buf, ",n %0,%1,%2");
6627 else if (! nullify && negated)
6628 strcat (buf, " %0,%1,%3");
6629 else if (! nullify && ! negated)
6630 strcat (buf, " %0,%1,%2");
6633 /* All long conditionals. Note a short backward branch with an
6634 unfilled delay slot is treated just like a long backward branch
6635 with an unfilled delay slot. */
6637 /* Handle weird backwards branch with a filled delay slot
6638 which is nullified. */
6639 if (dbr_sequence_length () != 0
6640 && ! forward_branch_p (insn)
6643 strcpy (buf, "bb,");
6644 if (GET_MODE (operands[0]) == DImode)
6646 if ((which == 0 && negated)
6647 || (which == 1 && ! negated))
6652 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6654 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6656 /* Handle short backwards branch with an unfilled delay slot.
6657 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6658 taken and untaken branches. */
6659 else if (dbr_sequence_length () == 0
6660 && ! forward_branch_p (insn)
6661 && INSN_ADDRESSES_SET_P ()
6662 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6663 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6665 strcpy (buf, "bb,");
6666 if (GET_MODE (operands[0]) == DImode)
6668 if ((which == 0 && negated)
6669 || (which == 1 && ! negated))
6674 strcat (buf, " %0,%1,%3%#");
6676 strcat (buf, " %0,%1,%2%#");
6680 if (GET_MODE (operands[0]) == DImode)
6681 strcpy (buf, "extrd,s,*");
6683 strcpy (buf, "{extrs,|extrw,s,}");
6684 if ((which == 0 && negated)
6685 || (which == 1 && ! negated))
6689 if (nullify && negated)
6690 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6691 else if (nullify && ! negated)
6692 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6694 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6696 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6701 /* The reversed conditional branch must branch over one additional
6702 instruction if the delay slot is filled and needs to be extracted
6703 by output_lbranch. If the delay slot is empty or this is a
6704 nullified forward branch, the instruction after the reversed
6705 condition branch must be nullified. */
6706 if (dbr_sequence_length () == 0
6707 || (nullify && forward_branch_p (insn)))
6711 operands[4] = GEN_INT (length);
6716 operands[4] = GEN_INT (length + 4);
6719 if (GET_MODE (operands[0]) == DImode)
6720 strcpy (buf, "bb,*");
6722 strcpy (buf, "bb,");
6723 if ((which == 0 && negated)
6724 || (which == 1 && !negated))
6729 strcat (buf, ",n %0,%1,.+%4");
6731 strcat (buf, " %0,%1,.+%4");
6732 output_asm_insn (buf, operands);
6733 return output_lbranch (negated ? operands[3] : operands[2],
6739 /* This routine handles all the branch-on-variable-bit conditional branch
6740 sequences we might need to generate. It handles nullification of delay
6741 slots, varying length branches, negated branches and all combinations
6742 of the above. it returns the appropriate output template to emit the
6746 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6748 static char buf[100];
6750 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6751 int length = get_attr_length (insn);
6754 /* A conditional branch to the following instruction (e.g. the delay slot) is
6755 asking for a disaster. I do not think this can happen as this pattern
6756 is only used when optimizing; jump optimization should eliminate the
6757 jump. But be prepared just in case. */
6759 if (branch_to_delay_slot_p (insn))
6762 /* If this is a long branch with its delay slot unfilled, set `nullify'
6763 as it can nullify the delay slot and save a nop. */
6764 if (length == 8 && dbr_sequence_length () == 0)
6767 /* If this is a short forward conditional branch which did not get
6768 its delay slot filled, the delay slot can still be nullified. */
6769 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6770 nullify = forward_branch_p (insn);
6772 /* A forward branch over a single nullified insn can be done with a
6773 extrs instruction. This avoids a single cycle penalty due to
6774 mis-predicted branch if we fall through (branch not taken). */
6777 && next_real_insn (insn) != 0
6778 && get_attr_length (next_real_insn (insn)) == 4
6779 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6786 /* All short conditional branches except backwards with an unfilled
6790 strcpy (buf, "{vextrs,|extrw,s,}");
6792 strcpy (buf, "{bvb,|bb,}");
6793 if (useskip && GET_MODE (operands[0]) == DImode)
6794 strcpy (buf, "extrd,s,*");
6795 else if (GET_MODE (operands[0]) == DImode)
6796 strcpy (buf, "bb,*");
6797 if ((which == 0 && negated)
6798 || (which == 1 && ! negated))
6803 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6804 else if (nullify && negated)
6806 if (branch_needs_nop_p (insn))
6807 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6809 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6811 else if (nullify && ! negated)
6813 if (branch_needs_nop_p (insn))
6814 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6816 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6818 else if (! nullify && negated)
6819 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6820 else if (! nullify && ! negated)
6821 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6824 /* All long conditionals. Note a short backward branch with an
6825 unfilled delay slot is treated just like a long backward branch
6826 with an unfilled delay slot. */
6828 /* Handle weird backwards branch with a filled delay slot
6829 which is nullified. */
6830 if (dbr_sequence_length () != 0
6831 && ! forward_branch_p (insn)
6834 strcpy (buf, "{bvb,|bb,}");
6835 if (GET_MODE (operands[0]) == DImode)
6837 if ((which == 0 && negated)
6838 || (which == 1 && ! negated))
6843 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6845 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6847 /* Handle short backwards branch with an unfilled delay slot.
6848 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6849 taken and untaken branches. */
6850 else if (dbr_sequence_length () == 0
6851 && ! forward_branch_p (insn)
6852 && INSN_ADDRESSES_SET_P ()
6853 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6854 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6856 strcpy (buf, "{bvb,|bb,}");
6857 if (GET_MODE (operands[0]) == DImode)
6859 if ((which == 0 && negated)
6860 || (which == 1 && ! negated))
6865 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6867 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6871 strcpy (buf, "{vextrs,|extrw,s,}");
6872 if (GET_MODE (operands[0]) == DImode)
6873 strcpy (buf, "extrd,s,*");
6874 if ((which == 0 && negated)
6875 || (which == 1 && ! negated))
6879 if (nullify && negated)
6880 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6881 else if (nullify && ! negated)
6882 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6884 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6886 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6891 /* The reversed conditional branch must branch over one additional
6892 instruction if the delay slot is filled and needs to be extracted
6893 by output_lbranch. If the delay slot is empty or this is a
6894 nullified forward branch, the instruction after the reversed
6895 condition branch must be nullified. */
6896 if (dbr_sequence_length () == 0
6897 || (nullify && forward_branch_p (insn)))
6901 operands[4] = GEN_INT (length);
6906 operands[4] = GEN_INT (length + 4);
6909 if (GET_MODE (operands[0]) == DImode)
6910 strcpy (buf, "bb,*");
6912 strcpy (buf, "{bvb,|bb,}");
6913 if ((which == 0 && negated)
6914 || (which == 1 && !negated))
6919 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6921 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6922 output_asm_insn (buf, operands);
6923 return output_lbranch (negated ? operands[3] : operands[2],
6929 /* Return the output template for emitting a dbra type insn.
6931 Note it may perform some output operations on its own before
6932 returning the final output string. */
6934 output_dbra (rtx *operands, rtx insn, int which_alternative)
6936 int length = get_attr_length (insn);
6938 /* A conditional branch to the following instruction (e.g. the delay slot) is
6939 asking for a disaster. Be prepared! */
6941 if (branch_to_delay_slot_p (insn))
6943 if (which_alternative == 0)
6944 return "ldo %1(%0),%0";
6945 else if (which_alternative == 1)
6947 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6948 output_asm_insn ("ldw -16(%%r30),%4", operands);
6949 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6950 return "{fldws|fldw} -16(%%r30),%0";
6954 output_asm_insn ("ldw %0,%4", operands);
6955 return "ldo %1(%4),%4\n\tstw %4,%0";
6959 if (which_alternative == 0)
6961 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6964 /* If this is a long branch with its delay slot unfilled, set `nullify'
6965 as it can nullify the delay slot and save a nop. */
6966 if (length == 8 && dbr_sequence_length () == 0)
6969 /* If this is a short forward conditional branch which did not get
6970 its delay slot filled, the delay slot can still be nullified. */
6971 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6972 nullify = forward_branch_p (insn);
6979 if (branch_needs_nop_p (insn))
6980 return "addib,%C2,n %1,%0,%3%#";
6982 return "addib,%C2,n %1,%0,%3";
6985 return "addib,%C2 %1,%0,%3";
6988 /* Handle weird backwards branch with a fulled delay slot
6989 which is nullified. */
6990 if (dbr_sequence_length () != 0
6991 && ! forward_branch_p (insn)
6993 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6994 /* Handle short backwards branch with an unfilled delay slot.
6995 Using a addb;nop rather than addi;bl saves 1 cycle for both
6996 taken and untaken branches. */
6997 else if (dbr_sequence_length () == 0
6998 && ! forward_branch_p (insn)
6999 && INSN_ADDRESSES_SET_P ()
7000 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7001 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7002 return "addib,%C2 %1,%0,%3%#";
7004 /* Handle normal cases. */
7006 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7008 return "addi,%N2 %1,%0,%0\n\tb %3";
7011 /* The reversed conditional branch must branch over one additional
7012 instruction if the delay slot is filled and needs to be extracted
7013 by output_lbranch. If the delay slot is empty or this is a
7014 nullified forward branch, the instruction after the reversed
7015 condition branch must be nullified. */
7016 if (dbr_sequence_length () == 0
7017 || (nullify && forward_branch_p (insn)))
7021 operands[4] = GEN_INT (length);
7026 operands[4] = GEN_INT (length + 4);
7030 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7032 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7034 return output_lbranch (operands[3], insn, xdelay);
7038 /* Deal with gross reload from FP register case. */
7039 else if (which_alternative == 1)
7041 /* Move loop counter from FP register to MEM then into a GR,
7042 increment the GR, store the GR into MEM, and finally reload
7043 the FP register from MEM from within the branch's delay slot. */
7044 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7046 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7048 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7049 else if (length == 28)
7050 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7053 operands[5] = GEN_INT (length - 16);
7054 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7055 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7056 return output_lbranch (operands[3], insn, 0);
7059 /* Deal with gross reload from memory case. */
7062 /* Reload loop counter from memory, the store back to memory
7063 happens in the branch's delay slot. */
7064 output_asm_insn ("ldw %0,%4", operands);
7066 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7067 else if (length == 16)
7068 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7071 operands[5] = GEN_INT (length - 4);
7072 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7073 return output_lbranch (operands[3], insn, 0);
7078 /* Return the output template for emitting a movb type insn.
7080 Note it may perform some output operations on its own before
7081 returning the final output string. */
7083 output_movb (rtx *operands, rtx insn, int which_alternative,
7084 int reverse_comparison)
7086 int length = get_attr_length (insn);
7088 /* A conditional branch to the following instruction (e.g. the delay slot) is
7089 asking for a disaster. Be prepared! */
7091 if (branch_to_delay_slot_p (insn))
7093 if (which_alternative == 0)
7094 return "copy %1,%0";
7095 else if (which_alternative == 1)
7097 output_asm_insn ("stw %1,-16(%%r30)", operands);
7098 return "{fldws|fldw} -16(%%r30),%0";
7100 else if (which_alternative == 2)
7106 /* Support the second variant. */
7107 if (reverse_comparison)
7108 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7110 if (which_alternative == 0)
7112 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7115 /* If this is a long branch with its delay slot unfilled, set `nullify'
7116 as it can nullify the delay slot and save a nop. */
7117 if (length == 8 && dbr_sequence_length () == 0)
7120 /* If this is a short forward conditional branch which did not get
7121 its delay slot filled, the delay slot can still be nullified. */
7122 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7123 nullify = forward_branch_p (insn);
7130 if (branch_needs_nop_p (insn))
7131 return "movb,%C2,n %1,%0,%3%#";
7133 return "movb,%C2,n %1,%0,%3";
7136 return "movb,%C2 %1,%0,%3";
7139 /* Handle weird backwards branch with a filled delay slot
7140 which is nullified. */
7141 if (dbr_sequence_length () != 0
7142 && ! forward_branch_p (insn)
7144 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7146 /* Handle short backwards branch with an unfilled delay slot.
7147 Using a movb;nop rather than or;bl saves 1 cycle for both
7148 taken and untaken branches. */
7149 else if (dbr_sequence_length () == 0
7150 && ! forward_branch_p (insn)
7151 && INSN_ADDRESSES_SET_P ()
7152 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7153 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7154 return "movb,%C2 %1,%0,%3%#";
7155 /* Handle normal cases. */
7157 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7159 return "or,%N2 %1,%%r0,%0\n\tb %3";
7162 /* The reversed conditional branch must branch over one additional
7163 instruction if the delay slot is filled and needs to be extracted
7164 by output_lbranch. If the delay slot is empty or this is a
7165 nullified forward branch, the instruction after the reversed
7166 condition branch must be nullified. */
7167 if (dbr_sequence_length () == 0
7168 || (nullify && forward_branch_p (insn)))
7172 operands[4] = GEN_INT (length);
7177 operands[4] = GEN_INT (length + 4);
7181 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7183 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7185 return output_lbranch (operands[3], insn, xdelay);
7188 /* Deal with gross reload for FP destination register case. */
7189 else if (which_alternative == 1)
7191 /* Move source register to MEM, perform the branch test, then
7192 finally load the FP register from MEM from within the branch's
7194 output_asm_insn ("stw %1,-16(%%r30)", operands);
7196 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7197 else if (length == 16)
7198 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7201 operands[4] = GEN_INT (length - 4);
7202 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7203 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7204 return output_lbranch (operands[3], insn, 0);
7207 /* Deal with gross reload from memory case. */
7208 else if (which_alternative == 2)
7210 /* Reload loop counter from memory, the store back to memory
7211 happens in the branch's delay slot. */
7213 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7214 else if (length == 12)
7215 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7218 operands[4] = GEN_INT (length);
7219 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7221 return output_lbranch (operands[3], insn, 0);
7224 /* Handle SAR as a destination. */
7228 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7229 else if (length == 12)
7230 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7233 operands[4] = GEN_INT (length);
7234 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7236 return output_lbranch (operands[3], insn, 0);
7241 /* Copy any FP arguments in INSN into integer registers. */
7243 copy_fp_args (rtx insn)
7248 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7250 int arg_mode, regno;
7251 rtx use = XEXP (link, 0);
7253 if (! (GET_CODE (use) == USE
7254 && GET_CODE (XEXP (use, 0)) == REG
7255 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7258 arg_mode = GET_MODE (XEXP (use, 0));
7259 regno = REGNO (XEXP (use, 0));
7261 /* Is it a floating point register? */
7262 if (regno >= 32 && regno <= 39)
7264 /* Copy the FP register into an integer register via memory. */
7265 if (arg_mode == SFmode)
7267 xoperands[0] = XEXP (use, 0);
7268 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7269 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7270 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7274 xoperands[0] = XEXP (use, 0);
7275 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7276 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7277 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7278 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7284 /* Compute length of the FP argument copy sequence for INSN. */
7286 length_fp_args (rtx insn)
7291 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7293 int arg_mode, regno;
7294 rtx use = XEXP (link, 0);
7296 if (! (GET_CODE (use) == USE
7297 && GET_CODE (XEXP (use, 0)) == REG
7298 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7301 arg_mode = GET_MODE (XEXP (use, 0));
7302 regno = REGNO (XEXP (use, 0));
7304 /* Is it a floating point register? */
7305 if (regno >= 32 && regno <= 39)
7307 if (arg_mode == SFmode)
7317 /* Return the attribute length for the millicode call instruction INSN.
7318 The length must match the code generated by output_millicode_call.
7319 We include the delay slot in the returned length as it is better to
7320 over estimate the length than to under estimate it. */
7323 attr_length_millicode_call (rtx insn)
7325 unsigned long distance = -1;
7326 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7328 if (INSN_ADDRESSES_SET_P ())
7330 distance = (total + insn_current_reference_address (insn));
7331 if (distance < total)
7337 if (!TARGET_LONG_CALLS && distance < 7600000)
7342 else if (TARGET_PORTABLE_RUNTIME)
7346 if (!TARGET_LONG_CALLS && distance < 240000)
7349 if (TARGET_LONG_ABS_CALL && !flag_pic)
7356 /* INSN is a function call. It may have an unconditional jump
7359 CALL_DEST is the routine we are calling. */
7362 output_millicode_call (rtx insn, rtx call_dest)
7364 int attr_length = get_attr_length (insn);
7365 int seq_length = dbr_sequence_length ();
7370 xoperands[0] = call_dest;
7371 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7373 /* Handle the common case where we are sure that the branch will
7374 reach the beginning of the $CODE$ subspace. The within reach
7375 form of the $$sh_func_adrs call has a length of 28. Because
7376 it has an attribute type of multi, it never has a nonzero
7377 sequence length. The length of the $$sh_func_adrs is the same
7378 as certain out of reach PIC calls to other routines. */
7379 if (!TARGET_LONG_CALLS
7380 && ((seq_length == 0
7381 && (attr_length == 12
7382 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7383 || (seq_length != 0 && attr_length == 8)))
7385 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7391 /* It might seem that one insn could be saved by accessing
7392 the millicode function using the linkage table. However,
7393 this doesn't work in shared libraries and other dynamically
7394 loaded objects. Using a pc-relative sequence also avoids
7395 problems related to the implicit use of the gp register. */
7396 output_asm_insn ("b,l .+8,%%r1", xoperands);
7400 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7401 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7405 xoperands[1] = gen_label_rtx ();
7406 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7407 targetm.asm_out.internal_label (asm_out_file, "L",
7408 CODE_LABEL_NUMBER (xoperands[1]));
7409 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7412 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7414 else if (TARGET_PORTABLE_RUNTIME)
7416 /* Pure portable runtime doesn't allow be/ble; we also don't
7417 have PIC support in the assembler/linker, so this sequence
7420 /* Get the address of our target into %r1. */
7421 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7422 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7424 /* Get our return address into %r31. */
7425 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7426 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7428 /* Jump to our target address in %r1. */
7429 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7433 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7435 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7437 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7441 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7442 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7444 if (TARGET_SOM || !TARGET_GAS)
7446 /* The HP assembler can generate relocations for the
7447 difference of two symbols. GAS can do this for a
7448 millicode symbol but not an arbitrary external
7449 symbol when generating SOM output. */
7450 xoperands[1] = gen_label_rtx ();
7451 targetm.asm_out.internal_label (asm_out_file, "L",
7452 CODE_LABEL_NUMBER (xoperands[1]));
7453 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7454 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7458 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7459 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7463 /* Jump to our target address in %r1. */
7464 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7468 if (seq_length == 0)
7469 output_asm_insn ("nop", xoperands);
7471 /* We are done if there isn't a jump in the delay slot. */
7472 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7475 /* This call has an unconditional jump in its delay slot. */
7476 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7478 /* See if the return address can be adjusted. Use the containing
7479 sequence insn's address. */
7480 if (INSN_ADDRESSES_SET_P ())
7482 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7483 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7484 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7486 if (VAL_14_BITS_P (distance))
7488 xoperands[1] = gen_label_rtx ();
7489 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7490 targetm.asm_out.internal_label (asm_out_file, "L",
7491 CODE_LABEL_NUMBER (xoperands[1]));
7494 /* ??? This branch may not reach its target. */
7495 output_asm_insn ("nop\n\tb,n %0", xoperands);
7498 /* ??? This branch may not reach its target. */
7499 output_asm_insn ("nop\n\tb,n %0", xoperands);
7501 /* Delete the jump. */
7502 SET_INSN_DELETED (NEXT_INSN (insn));
7507 /* Return the attribute length of the call instruction INSN. The SIBCALL
7508 flag indicates whether INSN is a regular call or a sibling call. The
7509 length returned must be longer than the code actually generated by
7510 output_call. Since branch shortening is done before delay branch
7511 sequencing, there is no way to determine whether or not the delay
7512 slot will be filled during branch shortening. Even when the delay
7513 slot is filled, we may have to add a nop if the delay slot contains
7514 a branch that can't reach its target. Thus, we always have to include
7515 the delay slot in the length estimate. This used to be done in
7516 pa_adjust_insn_length but we do it here now as some sequences always
7517 fill the delay slot and we can save four bytes in the estimate for
7521 attr_length_call (rtx insn, int sibcall)
7524 rtx call, call_dest;
7527 rtx pat = PATTERN (insn);
7528 unsigned long distance = -1;
7530 gcc_assert (GET_CODE (insn) == CALL_INSN);
7532 if (INSN_ADDRESSES_SET_P ())
7534 unsigned long total;
7536 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7537 distance = (total + insn_current_reference_address (insn));
7538 if (distance < total)
7542 gcc_assert (GET_CODE (pat) == PARALLEL);
7544 /* Get the call rtx. */
7545 call = XVECEXP (pat, 0, 0);
7546 if (GET_CODE (call) == SET)
7547 call = SET_SRC (call);
7549 gcc_assert (GET_CODE (call) == CALL);
7551 /* Determine if this is a local call. */
7552 call_dest = XEXP (XEXP (call, 0), 0);
7553 call_decl = SYMBOL_REF_DECL (call_dest);
7554 local_call = call_decl && targetm.binds_local_p (call_decl);
7556 /* pc-relative branch. */
7557 if (!TARGET_LONG_CALLS
7558 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7559 || distance < 240000))
7562 /* 64-bit plabel sequence. */
7563 else if (TARGET_64BIT && !local_call)
7564 length += sibcall ? 28 : 24;
7566 /* non-pic long absolute branch sequence. */
7567 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7570 /* long pc-relative branch sequence. */
7571 else if (TARGET_LONG_PIC_SDIFF_CALL
7572 || (TARGET_GAS && !TARGET_SOM
7573 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7577 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7581 /* 32-bit plabel sequence. */
7587 length += length_fp_args (insn);
7597 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7605 /* INSN is a function call. It may have an unconditional jump
7608 CALL_DEST is the routine we are calling. */
7611 output_call (rtx insn, rtx call_dest, int sibcall)
7613 int delay_insn_deleted = 0;
7614 int delay_slot_filled = 0;
7615 int seq_length = dbr_sequence_length ();
7616 tree call_decl = SYMBOL_REF_DECL (call_dest);
7617 int local_call = call_decl && targetm.binds_local_p (call_decl);
7620 xoperands[0] = call_dest;
7622 /* Handle the common case where we're sure that the branch will reach
7623 the beginning of the "$CODE$" subspace. This is the beginning of
7624 the current function if we are in a named section. */
7625 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7627 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7628 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7632 if (TARGET_64BIT && !local_call)
7634 /* ??? As far as I can tell, the HP linker doesn't support the
7635 long pc-relative sequence described in the 64-bit runtime
7636 architecture. So, we use a slightly longer indirect call. */
7637 xoperands[0] = get_deferred_plabel (call_dest);
7638 xoperands[1] = gen_label_rtx ();
7640 /* If this isn't a sibcall, we put the load of %r27 into the
7641 delay slot. We can't do this in a sibcall as we don't
7642 have a second call-clobbered scratch register available. */
7644 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7647 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7650 /* Now delete the delay insn. */
7651 SET_INSN_DELETED (NEXT_INSN (insn));
7652 delay_insn_deleted = 1;
7655 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7656 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7657 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7661 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7662 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7663 output_asm_insn ("bve (%%r1)", xoperands);
7667 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7668 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7669 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7670 delay_slot_filled = 1;
7675 int indirect_call = 0;
7677 /* Emit a long call. There are several different sequences
7678 of increasing length and complexity. In most cases,
7679 they don't allow an instruction in the delay slot. */
7680 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7681 && !TARGET_LONG_PIC_SDIFF_CALL
7682 && !(TARGET_GAS && !TARGET_SOM
7683 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7688 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7692 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7694 /* A non-jump insn in the delay slot. By definition we can
7695 emit this insn before the call (and in fact before argument
7697 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7700 /* Now delete the delay insn. */
7701 SET_INSN_DELETED (NEXT_INSN (insn));
7702 delay_insn_deleted = 1;
7705 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7707 /* This is the best sequence for making long calls in
7708 non-pic code. Unfortunately, GNU ld doesn't provide
7709 the stub needed for external calls, and GAS's support
7710 for this with the SOM linker is buggy. It is safe
7711 to use this for local calls. */
7712 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7714 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7718 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7721 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7723 output_asm_insn ("copy %%r31,%%r2", xoperands);
7724 delay_slot_filled = 1;
7729 if (TARGET_LONG_PIC_SDIFF_CALL)
7731 /* The HP assembler and linker can handle relocations
7732 for the difference of two symbols. The HP assembler
7733 recognizes the sequence as a pc-relative call and
7734 the linker provides stubs when needed. */
7735 xoperands[1] = gen_label_rtx ();
7736 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7737 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7738 targetm.asm_out.internal_label (asm_out_file, "L",
7739 CODE_LABEL_NUMBER (xoperands[1]));
7740 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7742 else if (TARGET_GAS && !TARGET_SOM
7743 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7745 /* GAS currently can't generate the relocations that
7746 are needed for the SOM linker under HP-UX using this
7747 sequence. The GNU linker doesn't generate the stubs
7748 that are needed for external calls on TARGET_ELF32
7749 with this sequence. For now, we have to use a
7750 longer plabel sequence when using GAS. */
7751 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7752 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7754 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7759 /* Emit a long plabel-based call sequence. This is
7760 essentially an inline implementation of $$dyncall.
7761 We don't actually try to call $$dyncall as this is
7762 as difficult as calling the function itself. */
7763 xoperands[0] = get_deferred_plabel (call_dest);
7764 xoperands[1] = gen_label_rtx ();
7766 /* Since the call is indirect, FP arguments in registers
7767 need to be copied to the general registers. Then, the
7768 argument relocation stub will copy them back. */
7770 copy_fp_args (insn);
7774 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7775 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7776 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7780 output_asm_insn ("addil LR'%0-$global$,%%r27",
7782 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7786 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7787 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7788 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7789 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7791 if (!sibcall && !TARGET_PA_20)
7793 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7794 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7795 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7797 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7804 output_asm_insn ("bve (%%r1)", xoperands);
7809 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7810 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7811 delay_slot_filled = 1;
7814 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7819 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7820 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7825 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7826 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7828 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7832 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7833 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7835 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7838 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7840 output_asm_insn ("copy %%r31,%%r2", xoperands);
7841 delay_slot_filled = 1;
7848 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7849 output_asm_insn ("nop", xoperands);
7851 /* We are done if there isn't a jump in the delay slot. */
7853 || delay_insn_deleted
7854 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7857 /* A sibcall should never have a branch in the delay slot. */
7858 gcc_assert (!sibcall);
7860 /* This call has an unconditional jump in its delay slot. */
7861 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7863 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7865 /* See if the return address can be adjusted. Use the containing
7866 sequence insn's address. This would break the regular call/return@
7867 relationship assumed by the table based eh unwinder, so only do that
7868 if the call is not possibly throwing. */
7869 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7870 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7871 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7873 if (VAL_14_BITS_P (distance)
7874 && !(can_throw_internal (insn) || can_throw_external (insn)))
7876 xoperands[1] = gen_label_rtx ();
7877 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7878 targetm.asm_out.internal_label (asm_out_file, "L",
7879 CODE_LABEL_NUMBER (xoperands[1]));
7882 output_asm_insn ("nop\n\tb,n %0", xoperands);
7885 output_asm_insn ("b,n %0", xoperands);
7887 /* Delete the jump. */
7888 SET_INSN_DELETED (NEXT_INSN (insn));
7893 /* Return the attribute length of the indirect call instruction INSN.
7894 The length must match the code generated by output_indirect call.
7895 The returned length includes the delay slot. Currently, the delay
7896 slot of an indirect call sequence is not exposed and it is used by
7897 the sequence itself. */
7900 attr_length_indirect_call (rtx insn)
7902 unsigned long distance = -1;
7903 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7905 if (INSN_ADDRESSES_SET_P ())
7907 distance = (total + insn_current_reference_address (insn));
7908 if (distance < total)
7915 if (TARGET_FAST_INDIRECT_CALLS
7916 || (!TARGET_PORTABLE_RUNTIME
7917 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7918 || distance < 240000)))
7924 if (TARGET_PORTABLE_RUNTIME)
7927 /* Out of reach, can use ble. */
7932 output_indirect_call (rtx insn, rtx call_dest)
7938 xoperands[0] = call_dest;
7939 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7940 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7944 /* First the special case for kernels, level 0 systems, etc. */
7945 if (TARGET_FAST_INDIRECT_CALLS)
7946 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7948 /* Now the normal case -- we can reach $$dyncall directly or
7949 we're sure that we can get there via a long-branch stub.
7951 No need to check target flags as the length uniquely identifies
7952 the remaining cases. */
7953 if (attr_length_indirect_call (insn) == 8)
7955 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7956 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
7957 variant of the B,L instruction can't be used on the SOM target. */
7958 if (TARGET_PA_20 && !TARGET_SOM)
7959 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7961 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7964 /* Long millicode call, but we are not generating PIC or portable runtime
7966 if (attr_length_indirect_call (insn) == 12)
7967 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7969 /* Long millicode call for portable runtime. */
7970 if (attr_length_indirect_call (insn) == 20)
7971 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7973 /* We need a long PIC call to $$dyncall. */
7974 xoperands[0] = NULL_RTX;
7975 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7976 if (TARGET_SOM || !TARGET_GAS)
7978 xoperands[0] = gen_label_rtx ();
7979 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7980 targetm.asm_out.internal_label (asm_out_file, "L",
7981 CODE_LABEL_NUMBER (xoperands[0]));
7982 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7986 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7987 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7990 output_asm_insn ("blr %%r0,%%r2", xoperands);
7991 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7995 /* Return the total length of the save and restore instructions needed for
7996 the data linkage table pointer (i.e., the PIC register) across the call
7997 instruction INSN. No-return calls do not require a save and restore.
7998 In addition, we may be able to avoid the save and restore for calls
7999 within the same translation unit. */
8002 attr_length_save_restore_dltp (rtx insn)
8004 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
8010 /* In HPUX 8.0's shared library scheme, special relocations are needed
8011 for function labels if they might be passed to a function
8012 in a shared library (because shared libraries don't live in code
8013 space), and special magic is needed to construct their address. */
8016 hppa_encode_label (rtx sym)
8018 const char *str = XSTR (sym, 0);
8019 int len = strlen (str) + 1;
8022 p = newstr = XALLOCAVEC (char, len + 1);
8026 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8030 pa_encode_section_info (tree decl, rtx rtl, int first)
8032 int old_referenced = 0;
8034 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8036 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8038 default_encode_section_info (decl, rtl, first);
8040 if (first && TEXT_SPACE_P (decl))
8042 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8043 if (TREE_CODE (decl) == FUNCTION_DECL)
8044 hppa_encode_label (XEXP (rtl, 0));
8046 else if (old_referenced)
8047 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8050 /* This is sort of inverse to pa_encode_section_info. */
8053 pa_strip_name_encoding (const char *str)
8055 str += (*str == '@');
8056 str += (*str == '*');
8061 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8063 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
8066 /* Returns 1 if OP is a function label involved in a simple addition
8067 with a constant. Used to keep certain patterns from matching
8068 during instruction combination. */
8070 is_function_label_plus_const (rtx op)
8072 /* Strip off any CONST. */
8073 if (GET_CODE (op) == CONST)
8076 return (GET_CODE (op) == PLUS
8077 && function_label_operand (XEXP (op, 0), Pmode)
8078 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8081 /* Output assembly code for a thunk to FUNCTION. */
8084 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8085 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8088 static unsigned int current_thunk_number;
8089 int val_14 = VAL_14_BITS_P (delta);
8090 unsigned int old_last_address = last_address, nbytes = 0;
8094 xoperands[0] = XEXP (DECL_RTL (function), 0);
8095 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8096 xoperands[2] = GEN_INT (delta);
8098 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8099 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8101 /* Output the thunk. We know that the function is in the same
8102 translation unit (i.e., the same space) as the thunk, and that
8103 thunks are output after their method. Thus, we don't need an
8104 external branch to reach the function. With SOM and GAS,
8105 functions and thunks are effectively in different sections.
8106 Thus, we can always use a IA-relative branch and the linker
8107 will add a long branch stub if necessary.
8109 However, we have to be careful when generating PIC code on the
8110 SOM port to ensure that the sequence does not transfer to an
8111 import stub for the target function as this could clobber the
8112 return value saved at SP-24. This would also apply to the
8113 32-bit linux port if the multi-space model is implemented. */
8114 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8115 && !(flag_pic && TREE_PUBLIC (function))
8116 && (TARGET_GAS || last_address < 262132))
8117 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8118 && ((targetm.have_named_sections
8119 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8120 /* The GNU 64-bit linker has rather poor stub management.
8121 So, we use a long branch from thunks that aren't in
8122 the same section as the target function. */
8124 && (DECL_SECTION_NAME (thunk_fndecl)
8125 != DECL_SECTION_NAME (function)))
8126 || ((DECL_SECTION_NAME (thunk_fndecl)
8127 == DECL_SECTION_NAME (function))
8128 && last_address < 262132)))
8129 || (targetm.have_named_sections
8130 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8131 && DECL_SECTION_NAME (function) == NULL
8132 && last_address < 262132)
8133 || (!targetm.have_named_sections && last_address < 262132))))
8136 output_asm_insn ("addil L'%2,%%r26", xoperands);
8138 output_asm_insn ("b %0", xoperands);
8142 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8147 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8151 else if (TARGET_64BIT)
8153 /* We only have one call-clobbered scratch register, so we can't
8154 make use of the delay slot if delta doesn't fit in 14 bits. */
8157 output_asm_insn ("addil L'%2,%%r26", xoperands);
8158 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8161 output_asm_insn ("b,l .+8,%%r1", xoperands);
8165 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8166 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8170 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8171 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8176 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8177 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8182 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8186 else if (TARGET_PORTABLE_RUNTIME)
8188 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8189 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8192 output_asm_insn ("addil L'%2,%%r26", xoperands);
8194 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8198 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8203 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8207 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8209 /* The function is accessible from outside this module. The only
8210 way to avoid an import stub between the thunk and function is to
8211 call the function directly with an indirect sequence similar to
8212 that used by $$dyncall. This is possible because $$dyncall acts
8213 as the import stub in an indirect call. */
8214 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8215 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8216 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8217 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8218 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8219 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8220 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8221 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8222 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8226 output_asm_insn ("addil L'%2,%%r26", xoperands);
8232 output_asm_insn ("bve (%%r22)", xoperands);
8235 else if (TARGET_NO_SPACE_REGS)
8237 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8242 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8243 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8244 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8249 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8251 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8255 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8257 if (TARGET_SOM || !TARGET_GAS)
8259 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8260 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8264 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8265 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8269 output_asm_insn ("addil L'%2,%%r26", xoperands);
8271 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8275 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8280 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8287 output_asm_insn ("addil L'%2,%%r26", xoperands);
8289 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8290 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8294 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8299 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8304 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8306 if (TARGET_SOM && TARGET_GAS)
8308 /* We done with this subspace except possibly for some additional
8309 debug information. Forget that we are in this subspace to ensure
8310 that the next function is output in its own subspace. */
8312 cfun->machine->in_nsubspa = 2;
8315 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8317 switch_to_section (data_section);
8318 output_asm_insn (".align 4", xoperands);
8319 ASM_OUTPUT_LABEL (file, label);
8320 output_asm_insn (".word P'%0", xoperands);
8323 current_thunk_number++;
8324 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8325 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8326 last_address += nbytes;
8327 if (old_last_address > last_address)
8328 last_address = UINT_MAX;
8329 update_total_code_bytes (nbytes);
8332 /* Only direct calls to static functions are allowed to be sibling (tail)
8335 This restriction is necessary because some linker generated stubs will
8336 store return pointers into rp' in some cases which might clobber a
8337 live value already in rp'.
8339 In a sibcall the current function and the target function share stack
8340 space. Thus if the path to the current function and the path to the
8341 target function save a value in rp', they save the value into the
8342 same stack slot, which has undesirable consequences.
8344 Because of the deferred binding nature of shared libraries any function
8345 with external scope could be in a different load module and thus require
8346 rp' to be saved when calling that function. So sibcall optimizations
8347 can only be safe for static function.
8349 Note that GCC never needs return value relocations, so we don't have to
8350 worry about static calls with return value relocations (which require
8353 It is safe to perform a sibcall optimization when the target function
8354 will never return. */
8356 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8358 if (TARGET_PORTABLE_RUNTIME)
8361 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8362 single subspace mode and the call is not indirect. As far as I know,
8363 there is no operating system support for the multiple subspace mode.
8364 It might be possible to support indirect calls if we didn't use
8365 $$dyncall (see the indirect sequence generated in output_call). */
8367 return (decl != NULL_TREE);
8369 /* Sibcalls are not ok because the arg pointer register is not a fixed
8370 register. This prevents the sibcall optimization from occurring. In
8371 addition, there are problems with stub placement using GNU ld. This
8372 is because a normal sibcall branch uses a 17-bit relocation while
8373 a regular call branch uses a 22-bit relocation. As a result, more
8374 care needs to be taken in the placement of long-branch stubs. */
8378 /* Sibcalls are only ok within a translation unit. */
8379 return (decl && !TREE_PUBLIC (decl));
8382 /* ??? Addition is not commutative on the PA due to the weird implicit
8383 space register selection rules for memory addresses. Therefore, we
8384 don't consider a + b == b + a, as this might be inside a MEM. */
8386 pa_commutative_p (const_rtx x, int outer_code)
8388 return (COMMUTATIVE_P (x)
8389 && (TARGET_NO_SPACE_REGS
8390 || (outer_code != UNKNOWN && outer_code != MEM)
8391 || GET_CODE (x) != PLUS));
8394 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8395 use in fmpyadd instructions. */
8397 fmpyaddoperands (rtx *operands)
8399 enum machine_mode mode = GET_MODE (operands[0]);
8401 /* Must be a floating point mode. */
8402 if (mode != SFmode && mode != DFmode)
8405 /* All modes must be the same. */
8406 if (! (mode == GET_MODE (operands[1])
8407 && mode == GET_MODE (operands[2])
8408 && mode == GET_MODE (operands[3])
8409 && mode == GET_MODE (operands[4])
8410 && mode == GET_MODE (operands[5])))
8413 /* All operands must be registers. */
8414 if (! (GET_CODE (operands[1]) == REG
8415 && GET_CODE (operands[2]) == REG
8416 && GET_CODE (operands[3]) == REG
8417 && GET_CODE (operands[4]) == REG
8418 && GET_CODE (operands[5]) == REG))
8421 /* Only 2 real operands to the addition. One of the input operands must
8422 be the same as the output operand. */
8423 if (! rtx_equal_p (operands[3], operands[4])
8424 && ! rtx_equal_p (operands[3], operands[5]))
8427 /* Inout operand of add cannot conflict with any operands from multiply. */
8428 if (rtx_equal_p (operands[3], operands[0])
8429 || rtx_equal_p (operands[3], operands[1])
8430 || rtx_equal_p (operands[3], operands[2]))
8433 /* multiply cannot feed into addition operands. */
8434 if (rtx_equal_p (operands[4], operands[0])
8435 || rtx_equal_p (operands[5], operands[0]))
8438 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8440 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8441 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8442 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8443 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8444 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8445 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8448 /* Passed. Operands are suitable for fmpyadd. */
8452 #if !defined(USE_COLLECT2)
8454 pa_asm_out_constructor (rtx symbol, int priority)
8456 if (!function_label_operand (symbol, VOIDmode))
8457 hppa_encode_label (symbol);
8459 #ifdef CTORS_SECTION_ASM_OP
8460 default_ctor_section_asm_out_constructor (symbol, priority);
8462 # ifdef TARGET_ASM_NAMED_SECTION
8463 default_named_section_asm_out_constructor (symbol, priority);
8465 default_stabs_asm_out_constructor (symbol, priority);
8471 pa_asm_out_destructor (rtx symbol, int priority)
8473 if (!function_label_operand (symbol, VOIDmode))
8474 hppa_encode_label (symbol);
8476 #ifdef DTORS_SECTION_ASM_OP
8477 default_dtor_section_asm_out_destructor (symbol, priority);
8479 # ifdef TARGET_ASM_NAMED_SECTION
8480 default_named_section_asm_out_destructor (symbol, priority);
8482 default_stabs_asm_out_destructor (symbol, priority);
8488 /* This function places uninitialized global data in the bss section.
8489 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8490 function on the SOM port to prevent uninitialized global data from
8491 being placed in the data section. */
8494 pa_asm_output_aligned_bss (FILE *stream,
8496 unsigned HOST_WIDE_INT size,
8499 switch_to_section (bss_section);
8500 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8502 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8503 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8506 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8507 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8510 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8511 ASM_OUTPUT_LABEL (stream, name);
8512 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8515 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8516 that doesn't allow the alignment of global common storage to be directly
8517 specified. The SOM linker aligns common storage based on the rounded
8518 value of the NUM_BYTES parameter in the .comm directive. It's not
8519 possible to use the .align directive as it doesn't affect the alignment
8520 of the label associated with a .comm directive. */
8523 pa_asm_output_aligned_common (FILE *stream,
8525 unsigned HOST_WIDE_INT size,
8528 unsigned int max_common_align;
8530 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8531 if (align > max_common_align)
8533 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8534 "for global common data. Using %u",
8535 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8536 align = max_common_align;
8539 switch_to_section (bss_section);
8541 assemble_name (stream, name);
8542 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8543 MAX (size, align / BITS_PER_UNIT));
8546 /* We can't use .comm for local common storage as the SOM linker effectively
8547 treats the symbol as universal and uses the same storage for local symbols
8548 with the same name in different object files. The .block directive
8549 reserves an uninitialized block of storage. However, it's not common
8550 storage. Fortunately, GCC never requests common storage with the same
8551 name in any given translation unit. */
8554 pa_asm_output_aligned_local (FILE *stream,
8556 unsigned HOST_WIDE_INT size,
8559 switch_to_section (bss_section);
8560 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8563 fprintf (stream, "%s", LOCAL_ASM_OP);
8564 assemble_name (stream, name);
8565 fprintf (stream, "\n");
8568 ASM_OUTPUT_LABEL (stream, name);
8569 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8572 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8573 use in fmpysub instructions. */
8575 fmpysuboperands (rtx *operands)
8577 enum machine_mode mode = GET_MODE (operands[0]);
8579 /* Must be a floating point mode. */
8580 if (mode != SFmode && mode != DFmode)
8583 /* All modes must be the same. */
8584 if (! (mode == GET_MODE (operands[1])
8585 && mode == GET_MODE (operands[2])
8586 && mode == GET_MODE (operands[3])
8587 && mode == GET_MODE (operands[4])
8588 && mode == GET_MODE (operands[5])))
8591 /* All operands must be registers. */
8592 if (! (GET_CODE (operands[1]) == REG
8593 && GET_CODE (operands[2]) == REG
8594 && GET_CODE (operands[3]) == REG
8595 && GET_CODE (operands[4]) == REG
8596 && GET_CODE (operands[5]) == REG))
8599 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8600 operation, so operands[4] must be the same as operand[3]. */
8601 if (! rtx_equal_p (operands[3], operands[4]))
8604 /* multiply cannot feed into subtraction. */
8605 if (rtx_equal_p (operands[5], operands[0]))
8608 /* Inout operand of sub cannot conflict with any operands from multiply. */
8609 if (rtx_equal_p (operands[3], operands[0])
8610 || rtx_equal_p (operands[3], operands[1])
8611 || rtx_equal_p (operands[3], operands[2]))
8614 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8616 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8617 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8618 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8619 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8620 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8621 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8624 /* Passed. Operands are suitable for fmpysub. */
8628 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8629 constants for shadd instructions. */
8631 shadd_constant_p (int val)
8633 if (val == 2 || val == 4 || val == 8)
8639 /* Return 1 if OP is valid as a base or index register in a
8643 borx_reg_operand (rtx op, enum machine_mode mode)
8645 if (GET_CODE (op) != REG)
8648 /* We must reject virtual registers as the only expressions that
8649 can be instantiated are REG and REG+CONST. */
8650 if (op == virtual_incoming_args_rtx
8651 || op == virtual_stack_vars_rtx
8652 || op == virtual_stack_dynamic_rtx
8653 || op == virtual_outgoing_args_rtx
8654 || op == virtual_cfa_rtx)
8657 /* While it's always safe to index off the frame pointer, it's not
8658 profitable to do so when the frame pointer is being eliminated. */
8659 if (!reload_completed
8660 && flag_omit_frame_pointer
8661 && !cfun->calls_alloca
8662 && op == frame_pointer_rtx)
8665 return register_operand (op, mode);
8668 /* Return 1 if this operand is anything other than a hard register. */
8671 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8673 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8676 /* Return TRUE if INSN branches forward. */
8679 forward_branch_p (rtx insn)
8681 rtx lab = JUMP_LABEL (insn);
8683 /* The INSN must have a jump label. */
8684 gcc_assert (lab != NULL_RTX);
8686 if (INSN_ADDRESSES_SET_P ())
8687 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8694 insn = NEXT_INSN (insn);
8700 /* Return 1 if OP is an equality comparison, else return 0. */
8702 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8704 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8707 /* Return 1 if INSN is in the delay slot of a call instruction. */
8709 jump_in_call_delay (rtx insn)
8712 if (GET_CODE (insn) != JUMP_INSN)
8715 if (PREV_INSN (insn)
8716 && PREV_INSN (PREV_INSN (insn))
8717 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8719 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8721 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8722 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8729 /* Output an unconditional move and branch insn. */
8732 output_parallel_movb (rtx *operands, rtx insn)
8734 int length = get_attr_length (insn);
8736 /* These are the cases in which we win. */
8738 return "mov%I1b,tr %1,%0,%2";
8740 /* None of the following cases win, but they don't lose either. */
8743 if (dbr_sequence_length () == 0)
8745 /* Nothing in the delay slot, fake it by putting the combined
8746 insn (the copy or add) in the delay slot of a bl. */
8747 if (GET_CODE (operands[1]) == CONST_INT)
8748 return "b %2\n\tldi %1,%0";
8750 return "b %2\n\tcopy %1,%0";
8754 /* Something in the delay slot, but we've got a long branch. */
8755 if (GET_CODE (operands[1]) == CONST_INT)
8756 return "ldi %1,%0\n\tb %2";
8758 return "copy %1,%0\n\tb %2";
8762 if (GET_CODE (operands[1]) == CONST_INT)
8763 output_asm_insn ("ldi %1,%0", operands);
8765 output_asm_insn ("copy %1,%0", operands);
8766 return output_lbranch (operands[2], insn, 1);
8769 /* Output an unconditional add and branch insn. */
8772 output_parallel_addb (rtx *operands, rtx insn)
8774 int length = get_attr_length (insn);
8776 /* To make life easy we want operand0 to be the shared input/output
8777 operand and operand1 to be the readonly operand. */
8778 if (operands[0] == operands[1])
8779 operands[1] = operands[2];
8781 /* These are the cases in which we win. */
8783 return "add%I1b,tr %1,%0,%3";
8785 /* None of the following cases win, but they don't lose either. */
8788 if (dbr_sequence_length () == 0)
8789 /* Nothing in the delay slot, fake it by putting the combined
8790 insn (the copy or add) in the delay slot of a bl. */
8791 return "b %3\n\tadd%I1 %1,%0,%0";
8793 /* Something in the delay slot, but we've got a long branch. */
8794 return "add%I1 %1,%0,%0\n\tb %3";
8797 output_asm_insn ("add%I1 %1,%0,%0", operands);
8798 return output_lbranch (operands[3], insn, 1);
8801 /* Return nonzero if INSN (a jump insn) immediately follows a call
8802 to a named function. This is used to avoid filling the delay slot
8803 of the jump since it can usually be eliminated by modifying RP in
8804 the delay slot of the call. */
8807 following_call (rtx insn)
8809 if (! TARGET_JUMP_IN_DELAY)
8812 /* Find the previous real insn, skipping NOTEs. */
8813 insn = PREV_INSN (insn);
8814 while (insn && GET_CODE (insn) == NOTE)
8815 insn = PREV_INSN (insn);
8817 /* Check for CALL_INSNs and millicode calls. */
8819 && ((GET_CODE (insn) == CALL_INSN
8820 && get_attr_type (insn) != TYPE_DYNCALL)
8821 || (GET_CODE (insn) == INSN
8822 && GET_CODE (PATTERN (insn)) != SEQUENCE
8823 && GET_CODE (PATTERN (insn)) != USE
8824 && GET_CODE (PATTERN (insn)) != CLOBBER
8825 && get_attr_type (insn) == TYPE_MILLI)))
8831 /* We use this hook to perform a PA specific optimization which is difficult
8832 to do in earlier passes.
8834 We want the delay slots of branches within jump tables to be filled.
8835 None of the compiler passes at the moment even has the notion that a
8836 PA jump table doesn't contain addresses, but instead contains actual
8839 Because we actually jump into the table, the addresses of each entry
8840 must stay constant in relation to the beginning of the table (which
8841 itself must stay constant relative to the instruction to jump into
8842 it). I don't believe we can guarantee earlier passes of the compiler
8843 will adhere to those rules.
8845 So, late in the compilation process we find all the jump tables, and
8846 expand them into real code -- e.g. each entry in the jump table vector
8847 will get an appropriate label followed by a jump to the final target.
8849 Reorg and the final jump pass can then optimize these branches and
8850 fill their delay slots. We end up with smaller, more efficient code.
8852 The jump instructions within the table are special; we must be able
8853 to identify them during assembly output (if the jumps don't get filled
8854 we need to emit a nop rather than nullifying the delay slot)). We
8855 identify jumps in switch tables by using insns with the attribute
8856 type TYPE_BTABLE_BRANCH.
8858 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8859 insns. This serves two purposes, first it prevents jump.c from
8860 noticing that the last N entries in the table jump to the instruction
8861 immediately after the table and deleting the jumps. Second, those
8862 insns mark where we should emit .begin_brtab and .end_brtab directives
8863 when using GAS (allows for better link time optimizations). */
8870 remove_useless_addtr_insns (1);
8872 if (pa_cpu < PROCESSOR_8000)
8873 pa_combine_instructions ();
8876 /* This is fairly cheap, so always run it if optimizing. */
8877 if (optimize > 0 && !TARGET_BIG_SWITCH)
8879 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8880 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8882 rtx pattern, tmp, location, label;
8883 unsigned int length, i;
8885 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8886 if (GET_CODE (insn) != JUMP_INSN
8887 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8888 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8891 /* Emit marker for the beginning of the branch table. */
8892 emit_insn_before (gen_begin_brtab (), insn);
8894 pattern = PATTERN (insn);
8895 location = PREV_INSN (insn);
8896 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8898 for (i = 0; i < length; i++)
8900 /* Emit a label before each jump to keep jump.c from
8901 removing this code. */
8902 tmp = gen_label_rtx ();
8903 LABEL_NUSES (tmp) = 1;
8904 emit_label_after (tmp, location);
8905 location = NEXT_INSN (location);
8907 if (GET_CODE (pattern) == ADDR_VEC)
8908 label = XEXP (XVECEXP (pattern, 0, i), 0);
8910 label = XEXP (XVECEXP (pattern, 1, i), 0);
8912 tmp = gen_short_jump (label);
8914 /* Emit the jump itself. */
8915 tmp = emit_jump_insn_after (tmp, location);
8916 JUMP_LABEL (tmp) = label;
8917 LABEL_NUSES (label)++;
8918 location = NEXT_INSN (location);
8920 /* Emit a BARRIER after the jump. */
8921 emit_barrier_after (location);
8922 location = NEXT_INSN (location);
8925 /* Emit marker for the end of the branch table. */
8926 emit_insn_before (gen_end_brtab (), location);
8927 location = NEXT_INSN (location);
8928 emit_barrier_after (location);
8930 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8936 /* Still need brtab marker insns. FIXME: the presence of these
8937 markers disables output of the branch table to readonly memory,
8938 and any alignment directives that might be needed. Possibly,
8939 the begin_brtab insn should be output before the label for the
8940 table. This doesn't matter at the moment since the tables are
8941 always output in the text section. */
8942 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8944 /* Find an ADDR_VEC insn. */
8945 if (GET_CODE (insn) != JUMP_INSN
8946 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8947 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8950 /* Now generate markers for the beginning and end of the
8952 emit_insn_before (gen_begin_brtab (), insn);
8953 emit_insn_after (gen_end_brtab (), insn);
8958 /* The PA has a number of odd instructions which can perform multiple
8959 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8960 it may be profitable to combine two instructions into one instruction
8961 with two outputs. It's not profitable PA2.0 machines because the
8962 two outputs would take two slots in the reorder buffers.
8964 This routine finds instructions which can be combined and combines
8965 them. We only support some of the potential combinations, and we
8966 only try common ways to find suitable instructions.
8968 * addb can add two registers or a register and a small integer
8969 and jump to a nearby (+-8k) location. Normally the jump to the
8970 nearby location is conditional on the result of the add, but by
8971 using the "true" condition we can make the jump unconditional.
8972 Thus addb can perform two independent operations in one insn.
8974 * movb is similar to addb in that it can perform a reg->reg
8975 or small immediate->reg copy and jump to a nearby (+-8k location).
8977 * fmpyadd and fmpysub can perform a FP multiply and either an
8978 FP add or FP sub if the operands of the multiply and add/sub are
8979 independent (there are other minor restrictions). Note both
8980 the fmpy and fadd/fsub can in theory move to better spots according
8981 to data dependencies, but for now we require the fmpy stay at a
8984 * Many of the memory operations can perform pre & post updates
8985 of index registers. GCC's pre/post increment/decrement addressing
8986 is far too simple to take advantage of all the possibilities. This
8987 pass may not be suitable since those insns may not be independent.
8989 * comclr can compare two ints or an int and a register, nullify
8990 the following instruction and zero some other register. This
8991 is more difficult to use as it's harder to find an insn which
8992 will generate a comclr than finding something like an unconditional
8993 branch. (conditional moves & long branches create comclr insns).
8995 * Most arithmetic operations can conditionally skip the next
8996 instruction. They can be viewed as "perform this operation
8997 and conditionally jump to this nearby location" (where nearby
8998 is an insns away). These are difficult to use due to the
8999 branch length restrictions. */
9002 pa_combine_instructions (void)
9004 rtx anchor, new_rtx;
9006 /* This can get expensive since the basic algorithm is on the
9007 order of O(n^2) (or worse). Only do it for -O2 or higher
9008 levels of optimization. */
9012 /* Walk down the list of insns looking for "anchor" insns which
9013 may be combined with "floating" insns. As the name implies,
9014 "anchor" instructions don't move, while "floating" insns may
9016 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9017 new_rtx = make_insn_raw (new_rtx);
9019 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9021 enum attr_pa_combine_type anchor_attr;
9022 enum attr_pa_combine_type floater_attr;
9024 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9025 Also ignore any special USE insns. */
9026 if ((GET_CODE (anchor) != INSN
9027 && GET_CODE (anchor) != JUMP_INSN
9028 && GET_CODE (anchor) != CALL_INSN)
9029 || GET_CODE (PATTERN (anchor)) == USE
9030 || GET_CODE (PATTERN (anchor)) == CLOBBER
9031 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
9032 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
9035 anchor_attr = get_attr_pa_combine_type (anchor);
9036 /* See if anchor is an insn suitable for combination. */
9037 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9038 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9039 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9040 && ! forward_branch_p (anchor)))
9044 for (floater = PREV_INSN (anchor);
9046 floater = PREV_INSN (floater))
9048 if (GET_CODE (floater) == NOTE
9049 || (GET_CODE (floater) == INSN
9050 && (GET_CODE (PATTERN (floater)) == USE
9051 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9054 /* Anything except a regular INSN will stop our search. */
9055 if (GET_CODE (floater) != INSN
9056 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9057 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9063 /* See if FLOATER is suitable for combination with the
9065 floater_attr = get_attr_pa_combine_type (floater);
9066 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9067 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9068 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9069 && floater_attr == PA_COMBINE_TYPE_FMPY))
9071 /* If ANCHOR and FLOATER can be combined, then we're
9072 done with this pass. */
9073 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9074 SET_DEST (PATTERN (floater)),
9075 XEXP (SET_SRC (PATTERN (floater)), 0),
9076 XEXP (SET_SRC (PATTERN (floater)), 1)))
9080 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9081 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9083 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9085 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9086 SET_DEST (PATTERN (floater)),
9087 XEXP (SET_SRC (PATTERN (floater)), 0),
9088 XEXP (SET_SRC (PATTERN (floater)), 1)))
9093 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9094 SET_DEST (PATTERN (floater)),
9095 SET_SRC (PATTERN (floater)),
9096 SET_SRC (PATTERN (floater))))
9102 /* If we didn't find anything on the backwards scan try forwards. */
9104 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9105 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9107 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9109 if (GET_CODE (floater) == NOTE
9110 || (GET_CODE (floater) == INSN
9111 && (GET_CODE (PATTERN (floater)) == USE
9112 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9116 /* Anything except a regular INSN will stop our search. */
9117 if (GET_CODE (floater) != INSN
9118 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9119 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9125 /* See if FLOATER is suitable for combination with the
9127 floater_attr = get_attr_pa_combine_type (floater);
9128 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9129 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9130 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9131 && floater_attr == PA_COMBINE_TYPE_FMPY))
9133 /* If ANCHOR and FLOATER can be combined, then we're
9134 done with this pass. */
9135 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9136 SET_DEST (PATTERN (floater)),
9137 XEXP (SET_SRC (PATTERN (floater)),
9139 XEXP (SET_SRC (PATTERN (floater)),
9146 /* FLOATER will be nonzero if we found a suitable floating
9147 insn for combination with ANCHOR. */
9149 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9150 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9152 /* Emit the new instruction and delete the old anchor. */
9153 emit_insn_before (gen_rtx_PARALLEL
9155 gen_rtvec (2, PATTERN (anchor),
9156 PATTERN (floater))),
9159 SET_INSN_DELETED (anchor);
9161 /* Emit a special USE insn for FLOATER, then delete
9162 the floating insn. */
9163 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9164 delete_insn (floater);
9169 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9172 /* Emit the new_jump instruction and delete the old anchor. */
9174 = emit_jump_insn_before (gen_rtx_PARALLEL
9176 gen_rtvec (2, PATTERN (anchor),
9177 PATTERN (floater))),
9180 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9181 SET_INSN_DELETED (anchor);
9183 /* Emit a special USE insn for FLOATER, then delete
9184 the floating insn. */
9185 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9186 delete_insn (floater);
9194 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
9197 int insn_code_number;
9200 /* Create a PARALLEL with the patterns of ANCHOR and
9201 FLOATER, try to recognize it, then test constraints
9202 for the resulting pattern.
9204 If the pattern doesn't match or the constraints
9205 aren't met keep searching for a suitable floater
9207 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9208 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9209 INSN_CODE (new_rtx) = -1;
9210 insn_code_number = recog_memoized (new_rtx);
9211 if (insn_code_number < 0
9212 || (extract_insn (new_rtx), ! constrain_operands (1)))
9226 /* There's up to three operands to consider. One
9227 output and two inputs.
9229 The output must not be used between FLOATER & ANCHOR
9230 exclusive. The inputs must not be set between
9231 FLOATER and ANCHOR exclusive. */
9233 if (reg_used_between_p (dest, start, end))
9236 if (reg_set_between_p (src1, start, end))
9239 if (reg_set_between_p (src2, start, end))
9242 /* If we get here, then everything is good. */
9246 /* Return nonzero if references for INSN are delayed.
9248 Millicode insns are actually function calls with some special
9249 constraints on arguments and register usage.
9251 Millicode calls always expect their arguments in the integer argument
9252 registers, and always return their result in %r29 (ret1). They
9253 are expected to clobber their arguments, %r1, %r29, and the return
9254 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9256 This function tells reorg that the references to arguments and
9257 millicode calls do not appear to happen until after the millicode call.
9258 This allows reorg to put insns which set the argument registers into the
9259 delay slot of the millicode call -- thus they act more like traditional
9262 Note we cannot consider side effects of the insn to be delayed because
9263 the branch and link insn will clobber the return pointer. If we happened
9264 to use the return pointer in the delay slot of the call, then we lose.
9266 get_attr_type will try to recognize the given insn, so make sure to
9267 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9270 insn_refs_are_delayed (rtx insn)
9272 return ((GET_CODE (insn) == INSN
9273 && GET_CODE (PATTERN (insn)) != SEQUENCE
9274 && GET_CODE (PATTERN (insn)) != USE
9275 && GET_CODE (PATTERN (insn)) != CLOBBER
9276 && get_attr_type (insn) == TYPE_MILLI));
9279 /* Promote the return value, but not the arguments. */
9281 static enum machine_mode
9282 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9283 enum machine_mode mode,
9284 int *punsignedp ATTRIBUTE_UNUSED,
9285 const_tree fntype ATTRIBUTE_UNUSED,
9288 if (for_return == 0)
9290 return promote_mode (type, mode, punsignedp);
9293 /* On the HP-PA the value is found in register(s) 28(-29), unless
9294 the mode is SF or DF. Then the value is returned in fr4 (32).
9296 This must perform the same promotions as PROMOTE_MODE, else promoting
9297 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9299 Small structures must be returned in a PARALLEL on PA64 in order
9300 to match the HP Compiler ABI. */
9303 pa_function_value (const_tree valtype,
9304 const_tree func ATTRIBUTE_UNUSED,
9305 bool outgoing ATTRIBUTE_UNUSED)
9307 enum machine_mode valmode;
9309 if (AGGREGATE_TYPE_P (valtype)
9310 || TREE_CODE (valtype) == COMPLEX_TYPE
9311 || TREE_CODE (valtype) == VECTOR_TYPE)
9315 /* Aggregates with a size less than or equal to 128 bits are
9316 returned in GR 28(-29). They are left justified. The pad
9317 bits are undefined. Larger aggregates are returned in
9321 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9323 for (i = 0; i < ub; i++)
9325 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9326 gen_rtx_REG (DImode, 28 + i),
9331 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9333 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9335 /* Aggregates 5 to 8 bytes in size are returned in general
9336 registers r28-r29 in the same manner as other non
9337 floating-point objects. The data is right-justified and
9338 zero-extended to 64 bits. This is opposite to the normal
9339 justification used on big endian targets and requires
9340 special treatment. */
9341 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9342 gen_rtx_REG (DImode, 28), const0_rtx);
9343 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9347 if ((INTEGRAL_TYPE_P (valtype)
9348 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9349 || POINTER_TYPE_P (valtype))
9350 valmode = word_mode;
9352 valmode = TYPE_MODE (valtype);
9354 if (TREE_CODE (valtype) == REAL_TYPE
9355 && !AGGREGATE_TYPE_P (valtype)
9356 && TYPE_MODE (valtype) != TFmode
9357 && !TARGET_SOFT_FLOAT)
9358 return gen_rtx_REG (valmode, 32);
9360 return gen_rtx_REG (valmode, 28);
9363 /* Implement the TARGET_LIBCALL_VALUE hook. */
9366 pa_libcall_value (enum machine_mode mode,
9367 const_rtx fun ATTRIBUTE_UNUSED)
9369 if (! TARGET_SOFT_FLOAT
9370 && (mode == SFmode || mode == DFmode))
9371 return gen_rtx_REG (mode, 32);
9373 return gen_rtx_REG (mode, 28);
9376 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9379 pa_function_value_regno_p (const unsigned int regno)
9382 || (! TARGET_SOFT_FLOAT && regno == 32))
9388 /* Update the data in CUM to advance over an argument
9389 of mode MODE and data type TYPE.
9390 (TYPE is null for libcalls where that information may not be available.) */
9393 pa_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9394 const_tree type, bool named ATTRIBUTE_UNUSED)
9396 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9398 cum->nargs_prototype--;
9399 cum->words += (arg_size
9400 + ((cum->words & 01)
9401 && type != NULL_TREE
9405 /* Return the location of a parameter that is passed in a register or NULL
9406 if the parameter has any component that is passed in memory.
9408 This is new code and will be pushed to into the net sources after
9411 ??? We might want to restructure this so that it looks more like other
9414 pa_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9415 const_tree type, bool named ATTRIBUTE_UNUSED)
9417 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9424 if (mode == VOIDmode)
9427 arg_size = FUNCTION_ARG_SIZE (mode, type);
9429 /* If this arg would be passed partially or totally on the stack, then
9430 this routine should return zero. pa_arg_partial_bytes will
9431 handle arguments which are split between regs and stack slots if
9432 the ABI mandates split arguments. */
9435 /* The 32-bit ABI does not split arguments. */
9436 if (cum->words + arg_size > max_arg_words)
9442 alignment = cum->words & 1;
9443 if (cum->words + alignment >= max_arg_words)
9447 /* The 32bit ABIs and the 64bit ABIs are rather different,
9448 particularly in their handling of FP registers. We might
9449 be able to cleverly share code between them, but I'm not
9450 going to bother in the hope that splitting them up results
9451 in code that is more easily understood. */
9455 /* Advance the base registers to their current locations.
9457 Remember, gprs grow towards smaller register numbers while
9458 fprs grow to higher register numbers. Also remember that
9459 although FP regs are 32-bit addressable, we pretend that
9460 the registers are 64-bits wide. */
9461 gpr_reg_base = 26 - cum->words;
9462 fpr_reg_base = 32 + cum->words;
9464 /* Arguments wider than one word and small aggregates need special
9468 || (type && (AGGREGATE_TYPE_P (type)
9469 || TREE_CODE (type) == COMPLEX_TYPE
9470 || TREE_CODE (type) == VECTOR_TYPE)))
9472 /* Double-extended precision (80-bit), quad-precision (128-bit)
9473 and aggregates including complex numbers are aligned on
9474 128-bit boundaries. The first eight 64-bit argument slots
9475 are associated one-to-one, with general registers r26
9476 through r19, and also with floating-point registers fr4
9477 through fr11. Arguments larger than one word are always
9478 passed in general registers.
9480 Using a PARALLEL with a word mode register results in left
9481 justified data on a big-endian target. */
9484 int i, offset = 0, ub = arg_size;
9486 /* Align the base register. */
9487 gpr_reg_base -= alignment;
9489 ub = MIN (ub, max_arg_words - cum->words - alignment);
9490 for (i = 0; i < ub; i++)
9492 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9493 gen_rtx_REG (DImode, gpr_reg_base),
9499 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9504 /* If the argument is larger than a word, then we know precisely
9505 which registers we must use. */
9519 /* Structures 5 to 8 bytes in size are passed in the general
9520 registers in the same manner as other non floating-point
9521 objects. The data is right-justified and zero-extended
9522 to 64 bits. This is opposite to the normal justification
9523 used on big endian targets and requires special treatment.
9524 We now define BLOCK_REG_PADDING to pad these objects.
9525 Aggregates, complex and vector types are passed in the same
9526 manner as structures. */
9528 || (type && (AGGREGATE_TYPE_P (type)
9529 || TREE_CODE (type) == COMPLEX_TYPE
9530 || TREE_CODE (type) == VECTOR_TYPE)))
9532 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9533 gen_rtx_REG (DImode, gpr_reg_base),
9535 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9540 /* We have a single word (32 bits). A simple computation
9541 will get us the register #s we need. */
9542 gpr_reg_base = 26 - cum->words;
9543 fpr_reg_base = 32 + 2 * cum->words;
9547 /* Determine if the argument needs to be passed in both general and
9548 floating point registers. */
9549 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9550 /* If we are doing soft-float with portable runtime, then there
9551 is no need to worry about FP regs. */
9552 && !TARGET_SOFT_FLOAT
9553 /* The parameter must be some kind of scalar float, else we just
9554 pass it in integer registers. */
9555 && GET_MODE_CLASS (mode) == MODE_FLOAT
9556 /* The target function must not have a prototype. */
9557 && cum->nargs_prototype <= 0
9558 /* libcalls do not need to pass items in both FP and general
9560 && type != NULL_TREE
9561 /* All this hair applies to "outgoing" args only. This includes
9562 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9564 /* Also pass outgoing floating arguments in both registers in indirect
9565 calls with the 32 bit ABI and the HP assembler since there is no
9566 way to the specify argument locations in static functions. */
9571 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9577 gen_rtx_EXPR_LIST (VOIDmode,
9578 gen_rtx_REG (mode, fpr_reg_base),
9580 gen_rtx_EXPR_LIST (VOIDmode,
9581 gen_rtx_REG (mode, gpr_reg_base),
9586 /* See if we should pass this parameter in a general register. */
9587 if (TARGET_SOFT_FLOAT
9588 /* Indirect calls in the normal 32bit ABI require all arguments
9589 to be passed in general registers. */
9590 || (!TARGET_PORTABLE_RUNTIME
9594 /* If the parameter is not a scalar floating-point parameter,
9595 then it belongs in GPRs. */
9596 || GET_MODE_CLASS (mode) != MODE_FLOAT
9597 /* Structure with single SFmode field belongs in GPR. */
9598 || (type && AGGREGATE_TYPE_P (type)))
9599 retval = gen_rtx_REG (mode, gpr_reg_base);
9601 retval = gen_rtx_REG (mode, fpr_reg_base);
9606 /* Arguments larger than one word are double word aligned. */
9609 pa_function_arg_boundary (enum machine_mode mode, const_tree type)
9611 tree size = TYPE_SIZE (type);
9612 bool singleword = (type
9613 ? (integer_zerop (size)
9614 || !TREE_CONSTANT (size)
9615 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9616 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9618 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9621 /* If this arg would be passed totally in registers or totally on the stack,
9622 then this routine should return zero. */
9625 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9626 tree type, bool named ATTRIBUTE_UNUSED)
9628 unsigned int max_arg_words = 8;
9629 unsigned int offset = 0;
9634 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9637 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9638 /* Arg fits fully into registers. */
9640 else if (cum->words + offset >= max_arg_words)
9641 /* Arg fully on the stack. */
9645 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9649 /* A get_unnamed_section callback for switching to the text section.
9651 This function is only used with SOM. Because we don't support
9652 named subspaces, we can only create a new subspace or switch back
9653 to the default text subspace. */
9656 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9658 gcc_assert (TARGET_SOM);
9661 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9663 /* We only want to emit a .nsubspa directive once at the
9664 start of the function. */
9665 cfun->machine->in_nsubspa = 1;
9667 /* Create a new subspace for the text. This provides
9668 better stub placement and one-only functions. */
9670 && DECL_ONE_ONLY (cfun->decl)
9671 && !DECL_WEAK (cfun->decl))
9673 output_section_asm_op ("\t.SPACE $TEXT$\n"
9674 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9675 "ACCESS=44,SORT=24,COMDAT");
9681 /* There isn't a current function or the body of the current
9682 function has been completed. So, we are changing to the
9683 text section to output debugging information. Thus, we
9684 need to forget that we are in the text section so that
9685 varasm.c will call us when text_section is selected again. */
9686 gcc_assert (!cfun || !cfun->machine
9687 || cfun->machine->in_nsubspa == 2);
9690 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9693 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9696 /* A get_unnamed_section callback for switching to comdat data
9697 sections. This function is only used with SOM. */
9700 som_output_comdat_data_section_asm_op (const void *data)
9703 output_section_asm_op (data);
9706 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9709 pa_som_asm_init_sections (void)
9712 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9714 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9715 is not being generated. */
9716 som_readonly_data_section
9717 = get_unnamed_section (0, output_section_asm_op,
9718 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9720 /* When secondary definitions are not supported, SOM makes readonly
9721 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9723 som_one_only_readonly_data_section
9724 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9726 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9727 "ACCESS=0x2c,SORT=16,COMDAT");
9730 /* When secondary definitions are not supported, SOM makes data one-only
9731 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9732 som_one_only_data_section
9733 = get_unnamed_section (SECTION_WRITE,
9734 som_output_comdat_data_section_asm_op,
9735 "\t.SPACE $PRIVATE$\n"
9736 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9737 "ACCESS=31,SORT=24,COMDAT");
9739 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9740 which reference data within the $TEXT$ space (for example constant
9741 strings in the $LIT$ subspace).
9743 The assemblers (GAS and HP as) both have problems with handling
9744 the difference of two symbols which is the other correct way to
9745 reference constant data during PIC code generation.
9747 So, there's no way to reference constant data which is in the
9748 $TEXT$ space during PIC generation. Instead place all constant
9749 data into the $PRIVATE$ subspace (this reduces sharing, but it
9750 works correctly). */
9751 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9753 /* We must not have a reference to an external symbol defined in a
9754 shared library in a readonly section, else the SOM linker will
9757 So, we force exception information into the data section. */
9758 exception_section = data_section;
9761 /* On hpux10, the linker will give an error if we have a reference
9762 in the read-only data section to a symbol defined in a shared
9763 library. Therefore, expressions that might require a reloc can
9764 not be placed in the read-only data section. */
9767 pa_select_section (tree exp, int reloc,
9768 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9770 if (TREE_CODE (exp) == VAR_DECL
9771 && TREE_READONLY (exp)
9772 && !TREE_THIS_VOLATILE (exp)
9773 && DECL_INITIAL (exp)
9774 && (DECL_INITIAL (exp) == error_mark_node
9775 || TREE_CONSTANT (DECL_INITIAL (exp)))
9779 && DECL_ONE_ONLY (exp)
9780 && !DECL_WEAK (exp))
9781 return som_one_only_readonly_data_section;
9783 return readonly_data_section;
9785 else if (CONSTANT_CLASS_P (exp) && !reloc)
9786 return readonly_data_section;
9788 && TREE_CODE (exp) == VAR_DECL
9789 && DECL_ONE_ONLY (exp)
9790 && !DECL_WEAK (exp))
9791 return som_one_only_data_section;
9793 return data_section;
9797 pa_globalize_label (FILE *stream, const char *name)
9799 /* We only handle DATA objects here, functions are globalized in
9800 ASM_DECLARE_FUNCTION_NAME. */
9801 if (! FUNCTION_NAME_P (name))
9803 fputs ("\t.EXPORT ", stream);
9804 assemble_name (stream, name);
9805 fputs (",DATA\n", stream);
9809 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9812 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9813 int incoming ATTRIBUTE_UNUSED)
9815 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9818 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9821 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9823 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9824 PA64 ABI says that objects larger than 128 bits are returned in memory.
9825 Note, int_size_in_bytes can return -1 if the size of the object is
9826 variable or larger than the maximum value that can be expressed as
9827 a HOST_WIDE_INT. It can also return zero for an empty type. The
9828 simplest way to handle variable and empty types is to pass them in
9829 memory. This avoids problems in defining the boundaries of argument
9830 slots, allocating registers, etc. */
9831 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9832 || int_size_in_bytes (type) <= 0);
9835 /* Structure to hold declaration and name of external symbols that are
9836 emitted by GCC. We generate a vector of these symbols and output them
9837 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9838 This avoids putting out names that are never really used. */
9840 typedef struct GTY(()) extern_symbol
9846 /* Define gc'd vector type for extern_symbol. */
9847 DEF_VEC_O(extern_symbol);
9848 DEF_VEC_ALLOC_O(extern_symbol,gc);
9850 /* Vector of extern_symbol pointers. */
9851 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9853 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9854 /* Mark DECL (name NAME) as an external reference (assembler output
9855 file FILE). This saves the names to output at the end of the file
9856 if actually referenced. */
9859 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9861 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9863 gcc_assert (file == asm_out_file);
9868 /* Output text required at the end of an assembler file.
9869 This includes deferred plabels and .import directives for
9870 all external symbols that were actually referenced. */
9873 pa_hpux_file_end (void)
9878 if (!NO_DEFERRED_PROFILE_COUNTERS)
9879 output_deferred_profile_counters ();
9881 output_deferred_plabels ();
9883 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9885 tree decl = p->decl;
9887 if (!TREE_ASM_WRITTEN (decl)
9888 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9889 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9892 VEC_free (extern_symbol, gc, extern_symbols);
9896 /* Return true if a change from mode FROM to mode TO for a register
9897 in register class RCLASS is invalid. */
9900 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9901 enum reg_class rclass)
9906 /* Reject changes to/from complex and vector modes. */
9907 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9908 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9911 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9914 /* There is no way to load QImode or HImode values directly from
9915 memory. SImode loads to the FP registers are not zero extended.
9916 On the 64-bit target, this conflicts with the definition of
9917 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9918 with different sizes in the floating-point registers. */
9919 if (MAYBE_FP_REG_CLASS_P (rclass))
9922 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9923 in specific sets of registers. Thus, we cannot allow changing
9924 to a larger mode when it's larger than a word. */
9925 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9926 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9932 /* Returns TRUE if it is a good idea to tie two pseudo registers
9933 when one has mode MODE1 and one has mode MODE2.
9934 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9935 for any hard reg, then this must be FALSE for correct output.
9937 We should return FALSE for QImode and HImode because these modes
9938 are not ok in the floating-point registers. However, this prevents
9939 tieing these modes to SImode and DImode in the general registers.
9940 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9941 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9942 in the floating-point registers. */
9945 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9947 /* Don't tie modes in different classes. */
9948 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9955 /* Length in units of the trampoline instruction code. */
9957 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9960 /* Output assembler code for a block containing the constant parts
9961 of a trampoline, leaving space for the variable parts.\
9963 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9964 and then branches to the specified routine.
9966 This code template is copied from text segment to stack location
9967 and then patched with pa_trampoline_init to contain valid values,
9968 and then entered as a subroutine.
9970 It is best to keep this as small as possible to avoid having to
9971 flush multiple lines in the cache. */
9974 pa_asm_trampoline_template (FILE *f)
9978 fputs ("\tldw 36(%r22),%r21\n", f);
9979 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
9980 if (ASSEMBLER_DIALECT == 0)
9981 fputs ("\tdepi 0,31,2,%r21\n", f);
9983 fputs ("\tdepwi 0,31,2,%r21\n", f);
9984 fputs ("\tldw 4(%r21),%r19\n", f);
9985 fputs ("\tldw 0(%r21),%r21\n", f);
9988 fputs ("\tbve (%r21)\n", f);
9989 fputs ("\tldw 40(%r22),%r29\n", f);
9990 fputs ("\t.word 0\n", f);
9991 fputs ("\t.word 0\n", f);
9995 fputs ("\tldsid (%r21),%r1\n", f);
9996 fputs ("\tmtsp %r1,%sr0\n", f);
9997 fputs ("\tbe 0(%sr0,%r21)\n", f);
9998 fputs ("\tldw 40(%r22),%r29\n", f);
10000 fputs ("\t.word 0\n", f);
10001 fputs ("\t.word 0\n", f);
10002 fputs ("\t.word 0\n", f);
10003 fputs ("\t.word 0\n", f);
10007 fputs ("\t.dword 0\n", f);
10008 fputs ("\t.dword 0\n", f);
10009 fputs ("\t.dword 0\n", f);
10010 fputs ("\t.dword 0\n", f);
10011 fputs ("\tmfia %r31\n", f);
10012 fputs ("\tldd 24(%r31),%r1\n", f);
10013 fputs ("\tldd 24(%r1),%r27\n", f);
10014 fputs ("\tldd 16(%r1),%r1\n", f);
10015 fputs ("\tbve (%r1)\n", f);
10016 fputs ("\tldd 32(%r31),%r31\n", f);
10017 fputs ("\t.dword 0 ; fptr\n", f);
10018 fputs ("\t.dword 0 ; static link\n", f);
10022 /* Emit RTL insns to initialize the variable parts of a trampoline.
10023 FNADDR is an RTX for the address of the function's pure code.
10024 CXT is an RTX for the static chain value for the function.
10026 Move the function address to the trampoline template at offset 36.
10027 Move the static chain value to trampoline template at offset 40.
10028 Move the trampoline address to trampoline template at offset 44.
10029 Move r19 to trampoline template at offset 48. The latter two
10030 words create a plabel for the indirect call to the trampoline.
10032 A similar sequence is used for the 64-bit port but the plabel is
10033 at the beginning of the trampoline.
10035 Finally, the cache entries for the trampoline code are flushed.
10036 This is necessary to ensure that the trampoline instruction sequence
10037 is written to memory prior to any attempts at prefetching the code
10041 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10043 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10044 rtx start_addr = gen_reg_rtx (Pmode);
10045 rtx end_addr = gen_reg_rtx (Pmode);
10046 rtx line_length = gen_reg_rtx (Pmode);
10049 emit_block_move (m_tramp, assemble_trampoline_template (),
10050 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10051 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10055 tmp = adjust_address (m_tramp, Pmode, 36);
10056 emit_move_insn (tmp, fnaddr);
10057 tmp = adjust_address (m_tramp, Pmode, 40);
10058 emit_move_insn (tmp, chain_value);
10060 /* Create a fat pointer for the trampoline. */
10061 tmp = adjust_address (m_tramp, Pmode, 44);
10062 emit_move_insn (tmp, r_tramp);
10063 tmp = adjust_address (m_tramp, Pmode, 48);
10064 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10066 /* fdc and fic only use registers for the address to flush,
10067 they do not accept integer displacements. We align the
10068 start and end addresses to the beginning of their respective
10069 cache lines to minimize the number of lines flushed. */
10070 emit_insn (gen_andsi3 (start_addr, r_tramp,
10071 GEN_INT (-MIN_CACHELINE_SIZE)));
10072 tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1));
10073 emit_insn (gen_andsi3 (end_addr, tmp,
10074 GEN_INT (-MIN_CACHELINE_SIZE)));
10075 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10076 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10077 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10078 gen_reg_rtx (Pmode),
10079 gen_reg_rtx (Pmode)));
10083 tmp = adjust_address (m_tramp, Pmode, 56);
10084 emit_move_insn (tmp, fnaddr);
10085 tmp = adjust_address (m_tramp, Pmode, 64);
10086 emit_move_insn (tmp, chain_value);
10088 /* Create a fat pointer for the trampoline. */
10089 tmp = adjust_address (m_tramp, Pmode, 16);
10090 emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32)));
10091 tmp = adjust_address (m_tramp, Pmode, 24);
10092 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10094 /* fdc and fic only use registers for the address to flush,
10095 they do not accept integer displacements. We align the
10096 start and end addresses to the beginning of their respective
10097 cache lines to minimize the number of lines flushed. */
10098 tmp = force_reg (Pmode, plus_constant (r_tramp, 32));
10099 emit_insn (gen_anddi3 (start_addr, tmp,
10100 GEN_INT (-MIN_CACHELINE_SIZE)));
10101 tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1));
10102 emit_insn (gen_anddi3 (end_addr, tmp,
10103 GEN_INT (-MIN_CACHELINE_SIZE)));
10104 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10105 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10106 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10107 gen_reg_rtx (Pmode),
10108 gen_reg_rtx (Pmode)));
10112 /* Perform any machine-specific adjustment in the address of the trampoline.
10113 ADDR contains the address that was passed to pa_trampoline_init.
10114 Adjust the trampoline address to point to the plabel at offset 44. */
10117 pa_trampoline_adjust_address (rtx addr)
10120 addr = memory_address (Pmode, plus_constant (addr, 46));
10125 pa_delegitimize_address (rtx orig_x)
10127 rtx x = delegitimize_mem_from_attrs (orig_x);
10129 if (GET_CODE (x) == LO_SUM
10130 && GET_CODE (XEXP (x, 1)) == UNSPEC
10131 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10132 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10137 pa_internal_arg_pointer (void)
10139 /* The argument pointer and the hard frame pointer are the same in
10140 the 32-bit runtime, so we don't need a copy. */
10142 return copy_to_reg (virtual_incoming_args_rtx);
10144 return virtual_incoming_args_rtx;
10147 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10148 Frame pointer elimination is automatically handled. */
10151 pa_can_eliminate (const int from, const int to)
10153 /* The argument cannot be eliminated in the 64-bit runtime. */
10154 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10157 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10158 ? ! frame_pointer_needed
10162 /* Define the offset between two registers, FROM to be eliminated and its
10163 replacement TO, at the start of a routine. */
10165 pa_initial_elimination_offset (int from, int to)
10167 HOST_WIDE_INT offset;
10169 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10170 && to == STACK_POINTER_REGNUM)
10171 offset = -compute_frame_size (get_frame_size (), 0);
10172 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10175 gcc_unreachable ();