1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "insn-attr.h"
40 #include "integrate.h"
42 #include "diagnostic-core.h"
48 #include "target-def.h"
49 #include "langhooks.h"
53 /* Return nonzero if there is a bypass for the output of
54 OUT_INSN and the fp store IN_INSN. */
56 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
58 enum machine_mode store_mode;
59 enum machine_mode other_mode;
62 if (recog_memoized (in_insn) < 0
63 || (get_attr_type (in_insn) != TYPE_FPSTORE
64 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
65 || recog_memoized (out_insn) < 0)
68 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
70 set = single_set (out_insn);
74 other_mode = GET_MODE (SET_SRC (set));
76 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
80 #ifndef DO_FRAME_NOTES
81 #ifdef INCOMING_RETURN_ADDR_RTX
82 #define DO_FRAME_NOTES 1
84 #define DO_FRAME_NOTES 0
88 static void pa_option_override (void);
89 static void copy_reg_pointer (rtx, rtx);
90 static void fix_range (const char *);
91 static bool pa_handle_option (struct gcc_options *, struct gcc_options *,
92 const struct cl_decoded_option *, location_t);
93 static int hppa_register_move_cost (enum machine_mode mode, reg_class_t,
95 static int hppa_address_cost (rtx, bool);
96 static bool hppa_rtx_costs (rtx, int, int, int *, bool);
97 static inline rtx force_mode (enum machine_mode, rtx);
98 static void pa_reorg (void);
99 static void pa_combine_instructions (void);
100 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
101 static bool forward_branch_p (rtx);
102 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
103 static int compute_movmem_length (rtx);
104 static int compute_clrmem_length (rtx);
105 static bool pa_assemble_integer (rtx, unsigned int, int);
106 static void remove_useless_addtr_insns (int);
107 static void store_reg (int, HOST_WIDE_INT, int);
108 static void store_reg_modify (int, int, HOST_WIDE_INT);
109 static void load_reg (int, HOST_WIDE_INT, int);
110 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
111 static rtx pa_function_value (const_tree, const_tree, bool);
112 static rtx pa_libcall_value (enum machine_mode, const_rtx);
113 static bool pa_function_value_regno_p (const unsigned int);
114 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
115 static void update_total_code_bytes (unsigned int);
116 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
117 static int pa_adjust_cost (rtx, rtx, rtx, int);
118 static int pa_adjust_priority (rtx, int);
119 static int pa_issue_rate (void);
120 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
121 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
123 static void pa_encode_section_info (tree, rtx, int);
124 static const char *pa_strip_name_encoding (const char *);
125 static bool pa_function_ok_for_sibcall (tree, tree);
126 static void pa_globalize_label (FILE *, const char *)
128 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
129 HOST_WIDE_INT, tree);
130 #if !defined(USE_COLLECT2)
131 static void pa_asm_out_constructor (rtx, int);
132 static void pa_asm_out_destructor (rtx, int);
134 static void pa_init_builtins (void);
135 static rtx pa_expand_builtin (tree, rtx, rtx, enum machine_mode mode, int);
136 static rtx hppa_builtin_saveregs (void);
137 static void hppa_va_start (tree, rtx);
138 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
139 static bool pa_scalar_mode_supported_p (enum machine_mode);
140 static bool pa_commutative_p (const_rtx x, int outer_code);
141 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
142 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
143 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
144 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
145 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
146 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
147 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
148 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
149 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
150 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
151 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
152 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
153 static void output_deferred_plabels (void);
154 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
155 #ifdef ASM_OUTPUT_EXTERNAL_REAL
156 static void pa_hpux_file_end (void);
158 #if HPUX_LONG_DOUBLE_LIBRARY
159 static void pa_hpux_init_libfuncs (void);
161 static rtx pa_struct_value_rtx (tree, int);
162 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
164 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
166 static void pa_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
168 static rtx pa_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
170 static unsigned int pa_function_arg_boundary (enum machine_mode, const_tree);
171 static struct machine_function * pa_init_machine_status (void);
172 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
174 secondary_reload_info *);
175 static void pa_extra_live_on_entry (bitmap);
176 static enum machine_mode pa_promote_function_mode (const_tree,
177 enum machine_mode, int *,
180 static void pa_asm_trampoline_template (FILE *);
181 static void pa_trampoline_init (rtx, tree, rtx);
182 static rtx pa_trampoline_adjust_address (rtx);
183 static rtx pa_delegitimize_address (rtx);
184 static bool pa_print_operand_punct_valid_p (unsigned char);
185 static rtx pa_internal_arg_pointer (void);
186 static bool pa_can_eliminate (const int, const int);
187 static void pa_conditional_register_usage (void);
188 static enum machine_mode pa_c_mode_for_suffix (char);
189 static section *pa_function_section (tree, enum node_frequency, bool, bool);
190 static bool pa_cannot_force_const_mem (enum machine_mode, rtx);
192 /* The following extra sections are only used for SOM. */
193 static GTY(()) section *som_readonly_data_section;
194 static GTY(()) section *som_one_only_readonly_data_section;
195 static GTY(()) section *som_one_only_data_section;
197 /* Counts for the number of callee-saved general and floating point
198 registers which were saved by the current function's prologue. */
199 static int gr_saved, fr_saved;
201 /* Boolean indicating whether the return pointer was saved by the
202 current function's prologue. */
203 static bool rp_saved;
205 static rtx find_addr_reg (rtx);
207 /* Keep track of the number of bytes we have output in the CODE subspace
208 during this compilation so we'll know when to emit inline long-calls. */
209 unsigned long total_code_bytes;
211 /* The last address of the previous function plus the number of bytes in
212 associated thunks that have been output. This is used to determine if
213 a thunk can use an IA-relative branch to reach its target function. */
214 static unsigned int last_address;
216 /* Variables to handle plabels that we discover are necessary at assembly
217 output time. They are output after the current function. */
218 struct GTY(()) deferred_plabel
223 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
225 static size_t n_deferred_plabels = 0;
227 /* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */
228 static const struct default_options pa_option_optimization_table[] =
230 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
231 { OPT_LEVELS_NONE, 0, NULL, 0 }
235 /* Initialize the GCC target structure. */
237 #undef TARGET_OPTION_OVERRIDE
238 #define TARGET_OPTION_OVERRIDE pa_option_override
239 #undef TARGET_OPTION_OPTIMIZATION_TABLE
240 #define TARGET_OPTION_OPTIMIZATION_TABLE pa_option_optimization_table
242 #undef TARGET_ASM_ALIGNED_HI_OP
243 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
244 #undef TARGET_ASM_ALIGNED_SI_OP
245 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
246 #undef TARGET_ASM_ALIGNED_DI_OP
247 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
248 #undef TARGET_ASM_UNALIGNED_HI_OP
249 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
250 #undef TARGET_ASM_UNALIGNED_SI_OP
251 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
252 #undef TARGET_ASM_UNALIGNED_DI_OP
253 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
254 #undef TARGET_ASM_INTEGER
255 #define TARGET_ASM_INTEGER pa_assemble_integer
257 #undef TARGET_ASM_FUNCTION_PROLOGUE
258 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
259 #undef TARGET_ASM_FUNCTION_EPILOGUE
260 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
262 #undef TARGET_FUNCTION_VALUE
263 #define TARGET_FUNCTION_VALUE pa_function_value
264 #undef TARGET_LIBCALL_VALUE
265 #define TARGET_LIBCALL_VALUE pa_libcall_value
266 #undef TARGET_FUNCTION_VALUE_REGNO_P
267 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
269 #undef TARGET_LEGITIMIZE_ADDRESS
270 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
272 #undef TARGET_SCHED_ADJUST_COST
273 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
274 #undef TARGET_SCHED_ADJUST_PRIORITY
275 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
276 #undef TARGET_SCHED_ISSUE_RATE
277 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
279 #undef TARGET_ENCODE_SECTION_INFO
280 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
281 #undef TARGET_STRIP_NAME_ENCODING
282 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
284 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
285 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
287 #undef TARGET_COMMUTATIVE_P
288 #define TARGET_COMMUTATIVE_P pa_commutative_p
290 #undef TARGET_ASM_OUTPUT_MI_THUNK
291 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
292 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
293 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
295 #undef TARGET_ASM_FILE_END
296 #ifdef ASM_OUTPUT_EXTERNAL_REAL
297 #define TARGET_ASM_FILE_END pa_hpux_file_end
299 #define TARGET_ASM_FILE_END output_deferred_plabels
302 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
303 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
305 #if !defined(USE_COLLECT2)
306 #undef TARGET_ASM_CONSTRUCTOR
307 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
308 #undef TARGET_ASM_DESTRUCTOR
309 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
312 #undef TARGET_DEFAULT_TARGET_FLAGS
313 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
314 #undef TARGET_HANDLE_OPTION
315 #define TARGET_HANDLE_OPTION pa_handle_option
317 #undef TARGET_INIT_BUILTINS
318 #define TARGET_INIT_BUILTINS pa_init_builtins
320 #undef TARGET_EXPAND_BUILTIN
321 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
323 #undef TARGET_REGISTER_MOVE_COST
324 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
325 #undef TARGET_RTX_COSTS
326 #define TARGET_RTX_COSTS hppa_rtx_costs
327 #undef TARGET_ADDRESS_COST
328 #define TARGET_ADDRESS_COST hppa_address_cost
330 #undef TARGET_MACHINE_DEPENDENT_REORG
331 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
333 #if HPUX_LONG_DOUBLE_LIBRARY
334 #undef TARGET_INIT_LIBFUNCS
335 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
338 #undef TARGET_PROMOTE_FUNCTION_MODE
339 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
340 #undef TARGET_PROMOTE_PROTOTYPES
341 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
343 #undef TARGET_STRUCT_VALUE_RTX
344 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
345 #undef TARGET_RETURN_IN_MEMORY
346 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
347 #undef TARGET_MUST_PASS_IN_STACK
348 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
349 #undef TARGET_PASS_BY_REFERENCE
350 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
351 #undef TARGET_CALLEE_COPIES
352 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
353 #undef TARGET_ARG_PARTIAL_BYTES
354 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
355 #undef TARGET_FUNCTION_ARG
356 #define TARGET_FUNCTION_ARG pa_function_arg
357 #undef TARGET_FUNCTION_ARG_ADVANCE
358 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
359 #undef TARGET_FUNCTION_ARG_BOUNDARY
360 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
362 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
363 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
364 #undef TARGET_EXPAND_BUILTIN_VA_START
365 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
366 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
367 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
369 #undef TARGET_SCALAR_MODE_SUPPORTED_P
370 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
372 #undef TARGET_CANNOT_FORCE_CONST_MEM
373 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
375 #undef TARGET_SECONDARY_RELOAD
376 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
378 #undef TARGET_EXTRA_LIVE_ON_ENTRY
379 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
381 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
382 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
383 #undef TARGET_TRAMPOLINE_INIT
384 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
385 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
386 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
387 #undef TARGET_DELEGITIMIZE_ADDRESS
388 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
389 #undef TARGET_INTERNAL_ARG_POINTER
390 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
391 #undef TARGET_CAN_ELIMINATE
392 #define TARGET_CAN_ELIMINATE pa_can_eliminate
393 #undef TARGET_CONDITIONAL_REGISTER_USAGE
394 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
395 #undef TARGET_C_MODE_FOR_SUFFIX
396 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
397 #undef TARGET_ASM_FUNCTION_SECTION
398 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
400 struct gcc_target targetm = TARGET_INITIALIZER;
402 /* Parse the -mfixed-range= option string. */
405 fix_range (const char *const_str)
408 char *str, *dash, *comma;
410 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
411 REG2 are either register names or register numbers. The effect
412 of this option is to mark the registers in the range from REG1 to
413 REG2 as ``fixed'' so they won't be used by the compiler. This is
414 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
416 i = strlen (const_str);
417 str = (char *) alloca (i + 1);
418 memcpy (str, const_str, i + 1);
422 dash = strchr (str, '-');
425 warning (0, "value of -mfixed-range must have form REG1-REG2");
430 comma = strchr (dash + 1, ',');
434 first = decode_reg_name (str);
437 warning (0, "unknown register name: %s", str);
441 last = decode_reg_name (dash + 1);
444 warning (0, "unknown register name: %s", dash + 1);
452 warning (0, "%s-%s is an empty range", str, dash + 1);
456 for (i = first; i <= last; ++i)
457 fixed_regs[i] = call_used_regs[i] = 1;
466 /* Check if all floating point registers have been fixed. */
467 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
472 target_flags |= MASK_DISABLE_FPREGS;
475 /* Implement TARGET_HANDLE_OPTION. */
478 pa_handle_option (struct gcc_options *opts,
479 struct gcc_options *opts_set ATTRIBUTE_UNUSED,
480 const struct cl_decoded_option *decoded,
481 location_t loc ATTRIBUTE_UNUSED)
483 size_t code = decoded->opt_index;
488 case OPT_mpa_risc_1_0:
490 opts->x_target_flags &= ~(MASK_PA_11 | MASK_PA_20);
494 case OPT_mpa_risc_1_1:
496 opts->x_target_flags &= ~MASK_PA_20;
497 opts->x_target_flags |= MASK_PA_11;
500 case OPT_mpa_risc_2_0:
502 opts->x_target_flags |= MASK_PA_11 | MASK_PA_20;
510 /* Implement the TARGET_OPTION_OVERRIDE hook. */
513 pa_option_override (void)
516 cl_deferred_option *opt;
517 VEC(cl_deferred_option,heap) *vec
518 = (VEC(cl_deferred_option,heap) *) pa_deferred_options;
520 FOR_EACH_VEC_ELT (cl_deferred_option, vec, i, opt)
522 switch (opt->opt_index)
524 case OPT_mfixed_range_:
525 fix_range (opt->arg);
533 /* Unconditional branches in the delay slot are not compatible with dwarf2
534 call frame information. There is no benefit in using this optimization
535 on PA8000 and later processors. */
536 if (pa_cpu >= PROCESSOR_8000
537 || (targetm.except_unwind_info (&global_options) == UI_DWARF2
539 || flag_unwind_tables)
540 target_flags &= ~MASK_JUMP_IN_DELAY;
542 if (flag_pic && TARGET_PORTABLE_RUNTIME)
544 warning (0, "PIC code generation is not supported in the portable runtime model");
547 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
549 warning (0, "PIC code generation is not compatible with fast indirect calls");
552 if (! TARGET_GAS && write_symbols != NO_DEBUG)
554 warning (0, "-g is only supported when using GAS on this processor,");
555 warning (0, "-g option disabled");
556 write_symbols = NO_DEBUG;
559 /* We only support the "big PIC" model now. And we always generate PIC
560 code when in 64bit mode. */
561 if (flag_pic == 1 || TARGET_64BIT)
564 /* Disable -freorder-blocks-and-partition as we don't support hot and
565 cold partitioning. */
566 if (flag_reorder_blocks_and_partition)
568 inform (input_location,
569 "-freorder-blocks-and-partition does not work "
570 "on this architecture");
571 flag_reorder_blocks_and_partition = 0;
572 flag_reorder_blocks = 1;
575 /* We can't guarantee that .dword is available for 32-bit targets. */
576 if (UNITS_PER_WORD == 4)
577 targetm.asm_out.aligned_op.di = NULL;
579 /* The unaligned ops are only available when using GAS. */
582 targetm.asm_out.unaligned_op.hi = NULL;
583 targetm.asm_out.unaligned_op.si = NULL;
584 targetm.asm_out.unaligned_op.di = NULL;
587 init_machine_status = pa_init_machine_status;
592 PA_BUILTIN_COPYSIGNQ,
595 PA_BUILTIN_HUGE_VALQ,
599 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
602 pa_init_builtins (void)
604 #ifdef DONT_HAVE_FPUTC_UNLOCKED
605 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
606 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
607 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
608 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
611 if (built_in_decls [BUILT_IN_FINITE])
612 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
613 if (built_in_decls [BUILT_IN_FINITEF])
614 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
617 if (HPUX_LONG_DOUBLE_LIBRARY)
621 /* Under HPUX, the __float128 type is a synonym for "long double". */
622 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
625 /* TFmode support builtins. */
626 ftype = build_function_type_list (long_double_type_node,
627 long_double_type_node,
629 decl = add_builtin_function ("__builtin_fabsq", ftype,
630 PA_BUILTIN_FABSQ, BUILT_IN_MD,
631 "_U_Qfabs", NULL_TREE);
632 TREE_READONLY (decl) = 1;
633 pa_builtins[PA_BUILTIN_FABSQ] = decl;
635 ftype = build_function_type_list (long_double_type_node,
636 long_double_type_node,
637 long_double_type_node,
639 decl = add_builtin_function ("__builtin_copysignq", ftype,
640 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
641 "_U_Qfcopysign", NULL_TREE);
642 TREE_READONLY (decl) = 1;
643 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
645 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
646 decl = add_builtin_function ("__builtin_infq", ftype,
647 PA_BUILTIN_INFQ, BUILT_IN_MD,
649 pa_builtins[PA_BUILTIN_INFQ] = decl;
651 decl = add_builtin_function ("__builtin_huge_valq", ftype,
652 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
654 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
659 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
660 enum machine_mode mode ATTRIBUTE_UNUSED,
661 int ignore ATTRIBUTE_UNUSED)
663 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
664 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
668 case PA_BUILTIN_FABSQ:
669 case PA_BUILTIN_COPYSIGNQ:
670 return expand_call (exp, target, ignore);
672 case PA_BUILTIN_INFQ:
673 case PA_BUILTIN_HUGE_VALQ:
675 enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
680 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
682 tmp = validize_mem (force_const_mem (target_mode, tmp));
685 target = gen_reg_rtx (target_mode);
687 emit_move_insn (target, tmp);
698 /* Function to init struct machine_function.
699 This will be called, via a pointer variable,
700 from push_function_context. */
702 static struct machine_function *
703 pa_init_machine_status (void)
705 return ggc_alloc_cleared_machine_function ();
708 /* If FROM is a probable pointer register, mark TO as a probable
709 pointer register with the same pointer alignment as FROM. */
712 copy_reg_pointer (rtx to, rtx from)
714 if (REG_POINTER (from))
715 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
718 /* Return 1 if X contains a symbolic expression. We know these
719 expressions will have one of a few well defined forms, so
720 we need only check those forms. */
722 symbolic_expression_p (rtx x)
725 /* Strip off any HIGH. */
726 if (GET_CODE (x) == HIGH)
729 return (symbolic_operand (x, VOIDmode));
732 /* Accept any constant that can be moved in one instruction into a
735 cint_ok_for_move (HOST_WIDE_INT ival)
737 /* OK if ldo, ldil, or zdepi, can be used. */
738 return (VAL_14_BITS_P (ival)
739 || ldil_cint_p (ival)
740 || zdepi_cint_p (ival));
743 /* True iff ldil can be used to load this CONST_INT. The least
744 significant 11 bits of the value must be zero and the value must
745 not change sign when extended from 32 to 64 bits. */
747 ldil_cint_p (HOST_WIDE_INT ival)
749 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
751 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
754 /* True iff zdepi can be used to generate this CONST_INT.
755 zdepi first sign extends a 5-bit signed number to a given field
756 length, then places this field anywhere in a zero. */
758 zdepi_cint_p (unsigned HOST_WIDE_INT x)
760 unsigned HOST_WIDE_INT lsb_mask, t;
762 /* This might not be obvious, but it's at least fast.
763 This function is critical; we don't have the time loops would take. */
765 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
766 /* Return true iff t is a power of two. */
767 return ((t & (t - 1)) == 0);
770 /* True iff depi or extru can be used to compute (reg & mask).
771 Accept bit pattern like these:
776 and_mask_p (unsigned HOST_WIDE_INT mask)
779 mask += mask & -mask;
780 return (mask & (mask - 1)) == 0;
783 /* True iff depi can be used to compute (reg | MASK). */
785 ior_mask_p (unsigned HOST_WIDE_INT mask)
787 mask += mask & -mask;
788 return (mask & (mask - 1)) == 0;
791 /* Legitimize PIC addresses. If the address is already
792 position-independent, we return ORIG. Newly generated
793 position-independent addresses go to REG. If we need more
794 than one register, we lose. */
797 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
801 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
803 /* Labels need special handling. */
804 if (pic_label_operand (orig, mode))
808 /* We do not want to go through the movXX expanders here since that
809 would create recursion.
811 Nor do we really want to call a generator for a named pattern
812 since that requires multiple patterns if we want to support
815 So instead we just emit the raw set, which avoids the movXX
816 expanders completely. */
817 mark_reg_pointer (reg, BITS_PER_UNIT);
818 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
820 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
821 add_reg_note (insn, REG_EQUAL, orig);
823 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
824 and update LABEL_NUSES because this is not done automatically. */
825 if (reload_in_progress || reload_completed)
827 /* Extract LABEL_REF. */
828 if (GET_CODE (orig) == CONST)
829 orig = XEXP (XEXP (orig, 0), 0);
830 /* Extract CODE_LABEL. */
831 orig = XEXP (orig, 0);
832 add_reg_note (insn, REG_LABEL_OPERAND, orig);
833 LABEL_NUSES (orig)++;
835 crtl->uses_pic_offset_table = 1;
838 if (GET_CODE (orig) == SYMBOL_REF)
844 /* Before reload, allocate a temporary register for the intermediate
845 result. This allows the sequence to be deleted when the final
846 result is unused and the insns are trivially dead. */
847 tmp_reg = ((reload_in_progress || reload_completed)
848 ? reg : gen_reg_rtx (Pmode));
850 if (function_label_operand (orig, VOIDmode))
852 /* Force function label into memory in word mode. */
853 orig = XEXP (force_const_mem (word_mode, orig), 0);
854 /* Load plabel address from DLT. */
855 emit_move_insn (tmp_reg,
856 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
857 gen_rtx_HIGH (word_mode, orig)));
859 = gen_const_mem (Pmode,
860 gen_rtx_LO_SUM (Pmode, tmp_reg,
861 gen_rtx_UNSPEC (Pmode,
864 emit_move_insn (reg, pic_ref);
865 /* Now load address of function descriptor. */
866 pic_ref = gen_rtx_MEM (Pmode, reg);
870 /* Load symbol reference from DLT. */
871 emit_move_insn (tmp_reg,
872 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
873 gen_rtx_HIGH (word_mode, orig)));
875 = gen_const_mem (Pmode,
876 gen_rtx_LO_SUM (Pmode, tmp_reg,
877 gen_rtx_UNSPEC (Pmode,
882 crtl->uses_pic_offset_table = 1;
883 mark_reg_pointer (reg, BITS_PER_UNIT);
884 insn = emit_move_insn (reg, pic_ref);
886 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
887 set_unique_reg_note (insn, REG_EQUAL, orig);
891 else if (GET_CODE (orig) == CONST)
895 if (GET_CODE (XEXP (orig, 0)) == PLUS
896 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
900 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
902 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
903 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
904 base == reg ? 0 : reg);
906 if (GET_CODE (orig) == CONST_INT)
908 if (INT_14_BITS (orig))
909 return plus_constant (base, INTVAL (orig));
910 orig = force_reg (Pmode, orig);
912 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
913 /* Likewise, should we set special REG_NOTEs here? */
919 static GTY(()) rtx gen_tls_tga;
922 gen_tls_get_addr (void)
925 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
930 hppa_tls_call (rtx arg)
934 ret = gen_reg_rtx (Pmode);
935 emit_library_call_value (gen_tls_get_addr (), ret,
936 LCT_CONST, Pmode, 1, arg, Pmode);
942 legitimize_tls_address (rtx addr)
944 rtx ret, insn, tmp, t1, t2, tp;
945 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
949 case TLS_MODEL_GLOBAL_DYNAMIC:
950 tmp = gen_reg_rtx (Pmode);
952 emit_insn (gen_tgd_load_pic (tmp, addr));
954 emit_insn (gen_tgd_load (tmp, addr));
955 ret = hppa_tls_call (tmp);
958 case TLS_MODEL_LOCAL_DYNAMIC:
959 ret = gen_reg_rtx (Pmode);
960 tmp = gen_reg_rtx (Pmode);
963 emit_insn (gen_tld_load_pic (tmp, addr));
965 emit_insn (gen_tld_load (tmp, addr));
966 t1 = hppa_tls_call (tmp);
969 t2 = gen_reg_rtx (Pmode);
970 emit_libcall_block (insn, t2, t1,
971 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
973 emit_insn (gen_tld_offset_load (ret, addr, t2));
976 case TLS_MODEL_INITIAL_EXEC:
977 tp = gen_reg_rtx (Pmode);
978 tmp = gen_reg_rtx (Pmode);
979 ret = gen_reg_rtx (Pmode);
980 emit_insn (gen_tp_load (tp));
982 emit_insn (gen_tie_load_pic (tmp, addr));
984 emit_insn (gen_tie_load (tmp, addr));
985 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
988 case TLS_MODEL_LOCAL_EXEC:
989 tp = gen_reg_rtx (Pmode);
990 ret = gen_reg_rtx (Pmode);
991 emit_insn (gen_tp_load (tp));
992 emit_insn (gen_tle_load (ret, addr, tp));
1002 /* Try machine-dependent ways of modifying an illegitimate address
1003 to be legitimate. If we find one, return the new, valid address.
1004 This macro is used in only one place: `memory_address' in explow.c.
1006 OLDX is the address as it was before break_out_memory_refs was called.
1007 In some cases it is useful to look at this to decide what needs to be done.
1009 It is always safe for this macro to do nothing. It exists to recognize
1010 opportunities to optimize the output.
1012 For the PA, transform:
1014 memory(X + <large int>)
1018 if (<large int> & mask) >= 16
1019 Y = (<large int> & ~mask) + mask + 1 Round up.
1021 Y = (<large int> & ~mask) Round down.
1023 memory (Z + (<large int> - Y));
1025 This is for CSE to find several similar references, and only use one Z.
1027 X can either be a SYMBOL_REF or REG, but because combine cannot
1028 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1029 D will not fit in 14 bits.
1031 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1034 MODE_INT references allow displacements which fit in 14 bits, so use
1037 This relies on the fact that most mode MODE_FLOAT references will use FP
1038 registers and most mode MODE_INT references will use integer registers.
1039 (In the rare case of an FP register used in an integer MODE, we depend
1040 on secondary reloads to clean things up.)
1043 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1044 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1045 addressing modes to be used).
1047 Put X and Z into registers. Then put the entire expression into
1051 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1052 enum machine_mode mode)
1056 /* We need to canonicalize the order of operands in unscaled indexed
1057 addresses since the code that checks if an address is valid doesn't
1058 always try both orders. */
1059 if (!TARGET_NO_SPACE_REGS
1060 && GET_CODE (x) == PLUS
1061 && GET_MODE (x) == Pmode
1062 && REG_P (XEXP (x, 0))
1063 && REG_P (XEXP (x, 1))
1064 && REG_POINTER (XEXP (x, 0))
1065 && !REG_POINTER (XEXP (x, 1)))
1066 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1068 if (PA_SYMBOL_REF_TLS_P (x))
1069 return legitimize_tls_address (x);
1071 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1073 /* Strip off CONST. */
1074 if (GET_CODE (x) == CONST)
1077 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1078 That should always be safe. */
1079 if (GET_CODE (x) == PLUS
1080 && GET_CODE (XEXP (x, 0)) == REG
1081 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1083 rtx reg = force_reg (Pmode, XEXP (x, 1));
1084 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1087 /* Note we must reject symbols which represent function addresses
1088 since the assembler/linker can't handle arithmetic on plabels. */
1089 if (GET_CODE (x) == PLUS
1090 && GET_CODE (XEXP (x, 1)) == CONST_INT
1091 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1092 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1093 || GET_CODE (XEXP (x, 0)) == REG))
1095 rtx int_part, ptr_reg;
1097 int offset = INTVAL (XEXP (x, 1));
1100 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1101 ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
1103 /* Choose which way to round the offset. Round up if we
1104 are >= halfway to the next boundary. */
1105 if ((offset & mask) >= ((mask + 1) / 2))
1106 newoffset = (offset & ~ mask) + mask + 1;
1108 newoffset = (offset & ~ mask);
1110 /* If the newoffset will not fit in 14 bits (ldo), then
1111 handling this would take 4 or 5 instructions (2 to load
1112 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1113 add the new offset and the SYMBOL_REF.) Combine can
1114 not handle 4->2 or 5->2 combinations, so do not create
1116 if (! VAL_14_BITS_P (newoffset)
1117 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1119 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1122 gen_rtx_HIGH (Pmode, const_part));
1125 gen_rtx_LO_SUM (Pmode,
1126 tmp_reg, const_part));
1130 if (! VAL_14_BITS_P (newoffset))
1131 int_part = force_reg (Pmode, GEN_INT (newoffset));
1133 int_part = GEN_INT (newoffset);
1135 ptr_reg = force_reg (Pmode,
1136 gen_rtx_PLUS (Pmode,
1137 force_reg (Pmode, XEXP (x, 0)),
1140 return plus_constant (ptr_reg, offset - newoffset);
1143 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1145 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1146 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1147 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1148 && (OBJECT_P (XEXP (x, 1))
1149 || GET_CODE (XEXP (x, 1)) == SUBREG)
1150 && GET_CODE (XEXP (x, 1)) != CONST)
1152 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1156 if (GET_CODE (reg1) != REG)
1157 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1159 reg2 = XEXP (XEXP (x, 0), 0);
1160 if (GET_CODE (reg2) != REG)
1161 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1163 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1164 gen_rtx_MULT (Pmode,
1170 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1172 Only do so for floating point modes since this is more speculative
1173 and we lose if it's an integer store. */
1174 if (GET_CODE (x) == PLUS
1175 && GET_CODE (XEXP (x, 0)) == PLUS
1176 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1177 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1178 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1179 && (mode == SFmode || mode == DFmode))
1182 /* First, try and figure out what to use as a base register. */
1183 rtx reg1, reg2, base, idx;
1185 reg1 = XEXP (XEXP (x, 0), 1);
1190 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1191 then emit_move_sequence will turn on REG_POINTER so we'll know
1192 it's a base register below. */
1193 if (GET_CODE (reg1) != REG)
1194 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1196 if (GET_CODE (reg2) != REG)
1197 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1199 /* Figure out what the base and index are. */
1201 if (GET_CODE (reg1) == REG
1202 && REG_POINTER (reg1))
1205 idx = gen_rtx_PLUS (Pmode,
1206 gen_rtx_MULT (Pmode,
1207 XEXP (XEXP (XEXP (x, 0), 0), 0),
1208 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1211 else if (GET_CODE (reg2) == REG
1212 && REG_POINTER (reg2))
1221 /* If the index adds a large constant, try to scale the
1222 constant so that it can be loaded with only one insn. */
1223 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1224 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1225 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1226 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1228 /* Divide the CONST_INT by the scale factor, then add it to A. */
1229 int val = INTVAL (XEXP (idx, 1));
1231 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1232 reg1 = XEXP (XEXP (idx, 0), 0);
1233 if (GET_CODE (reg1) != REG)
1234 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1236 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1238 /* We can now generate a simple scaled indexed address. */
1241 (Pmode, gen_rtx_PLUS (Pmode,
1242 gen_rtx_MULT (Pmode, reg1,
1243 XEXP (XEXP (idx, 0), 1)),
1247 /* If B + C is still a valid base register, then add them. */
1248 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1249 && INTVAL (XEXP (idx, 1)) <= 4096
1250 && INTVAL (XEXP (idx, 1)) >= -4096)
1252 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1255 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1257 reg2 = XEXP (XEXP (idx, 0), 0);
1258 if (GET_CODE (reg2) != CONST_INT)
1259 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1261 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1262 gen_rtx_MULT (Pmode,
1268 /* Get the index into a register, then add the base + index and
1269 return a register holding the result. */
1271 /* First get A into a register. */
1272 reg1 = XEXP (XEXP (idx, 0), 0);
1273 if (GET_CODE (reg1) != REG)
1274 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1276 /* And get B into a register. */
1277 reg2 = XEXP (idx, 1);
1278 if (GET_CODE (reg2) != REG)
1279 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1281 reg1 = force_reg (Pmode,
1282 gen_rtx_PLUS (Pmode,
1283 gen_rtx_MULT (Pmode, reg1,
1284 XEXP (XEXP (idx, 0), 1)),
1287 /* Add the result to our base register and return. */
1288 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1292 /* Uh-oh. We might have an address for x[n-100000]. This needs
1293 special handling to avoid creating an indexed memory address
1294 with x-100000 as the base.
1296 If the constant part is small enough, then it's still safe because
1297 there is a guard page at the beginning and end of the data segment.
1299 Scaled references are common enough that we want to try and rearrange the
1300 terms so that we can use indexing for these addresses too. Only
1301 do the optimization for floatint point modes. */
1303 if (GET_CODE (x) == PLUS
1304 && symbolic_expression_p (XEXP (x, 1)))
1306 /* Ugly. We modify things here so that the address offset specified
1307 by the index expression is computed first, then added to x to form
1308 the entire address. */
1310 rtx regx1, regx2, regy1, regy2, y;
1312 /* Strip off any CONST. */
1314 if (GET_CODE (y) == CONST)
1317 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1319 /* See if this looks like
1320 (plus (mult (reg) (shadd_const))
1321 (const (plus (symbol_ref) (const_int))))
1323 Where const_int is small. In that case the const
1324 expression is a valid pointer for indexing.
1326 If const_int is big, but can be divided evenly by shadd_const
1327 and added to (reg). This allows more scaled indexed addresses. */
1328 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1329 && GET_CODE (XEXP (x, 0)) == MULT
1330 && GET_CODE (XEXP (y, 1)) == CONST_INT
1331 && INTVAL (XEXP (y, 1)) >= -4096
1332 && INTVAL (XEXP (y, 1)) <= 4095
1333 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1334 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1336 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1340 if (GET_CODE (reg1) != REG)
1341 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1343 reg2 = XEXP (XEXP (x, 0), 0);
1344 if (GET_CODE (reg2) != REG)
1345 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1347 return force_reg (Pmode,
1348 gen_rtx_PLUS (Pmode,
1349 gen_rtx_MULT (Pmode,
1354 else if ((mode == DFmode || mode == SFmode)
1355 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1356 && GET_CODE (XEXP (x, 0)) == MULT
1357 && GET_CODE (XEXP (y, 1)) == CONST_INT
1358 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1359 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1360 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1363 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1364 / INTVAL (XEXP (XEXP (x, 0), 1))));
1365 regx2 = XEXP (XEXP (x, 0), 0);
1366 if (GET_CODE (regx2) != REG)
1367 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1368 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1372 gen_rtx_PLUS (Pmode,
1373 gen_rtx_MULT (Pmode, regx2,
1374 XEXP (XEXP (x, 0), 1)),
1375 force_reg (Pmode, XEXP (y, 0))));
1377 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1378 && INTVAL (XEXP (y, 1)) >= -4096
1379 && INTVAL (XEXP (y, 1)) <= 4095)
1381 /* This is safe because of the guard page at the
1382 beginning and end of the data space. Just
1383 return the original address. */
1388 /* Doesn't look like one we can optimize. */
1389 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1390 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1391 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1392 regx1 = force_reg (Pmode,
1393 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1395 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1403 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1405 Compute extra cost of moving data between one register class
1408 Make moves from SAR so expensive they should never happen. We used to
1409 have 0xffff here, but that generates overflow in rare cases.
1411 Copies involving a FP register and a non-FP register are relatively
1412 expensive because they must go through memory.
1414 Other copies are reasonably cheap. */
1417 hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
1418 reg_class_t from, reg_class_t to)
1420 if (from == SHIFT_REGS)
1422 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1424 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1425 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1431 /* For the HPPA, REG and REG+CONST is cost 0
1432 and addresses involving symbolic constants are cost 2.
1434 PIC addresses are very expensive.
1436 It is no coincidence that this has the same structure
1437 as GO_IF_LEGITIMATE_ADDRESS. */
1440 hppa_address_cost (rtx X,
1441 bool speed ATTRIBUTE_UNUSED)
1443 switch (GET_CODE (X))
1456 /* Compute a (partial) cost for rtx X. Return true if the complete
1457 cost has been computed, and false if subexpressions should be
1458 scanned. In either case, *TOTAL contains the cost result. */
1461 hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
1462 bool speed ATTRIBUTE_UNUSED)
1467 if (INTVAL (x) == 0)
1469 else if (INT_14_BITS (x))
1486 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1487 && outer_code != SET)
1494 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1495 *total = COSTS_N_INSNS (3);
1496 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1497 *total = COSTS_N_INSNS (8);
1499 *total = COSTS_N_INSNS (20);
1503 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1505 *total = COSTS_N_INSNS (14);
1513 *total = COSTS_N_INSNS (60);
1516 case PLUS: /* this includes shNadd insns */
1518 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1519 *total = COSTS_N_INSNS (3);
1521 *total = COSTS_N_INSNS (1);
1527 *total = COSTS_N_INSNS (1);
1535 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1536 new rtx with the correct mode. */
1538 force_mode (enum machine_mode mode, rtx orig)
1540 if (mode == GET_MODE (orig))
1543 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1545 return gen_rtx_REG (mode, REGNO (orig));
1548 /* Return 1 if *X is a thread-local symbol. */
1551 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1553 return PA_SYMBOL_REF_TLS_P (*x);
1556 /* Return 1 if X contains a thread-local symbol. */
1559 pa_tls_referenced_p (rtx x)
1561 if (!TARGET_HAVE_TLS)
1564 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1567 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1570 pa_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1572 return pa_tls_referenced_p (x);
1575 /* Emit insns to move operands[1] into operands[0].
1577 Return 1 if we have written out everything that needs to be done to
1578 do the move. Otherwise, return 0 and the caller will emit the move
1581 Note SCRATCH_REG may not be in the proper mode depending on how it
1582 will be used. This routine is responsible for creating a new copy
1583 of SCRATCH_REG in the proper mode. */
1586 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1588 register rtx operand0 = operands[0];
1589 register rtx operand1 = operands[1];
1592 /* We can only handle indexed addresses in the destination operand
1593 of floating point stores. Thus, we need to break out indexed
1594 addresses from the destination operand. */
1595 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1597 gcc_assert (can_create_pseudo_p ());
1599 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1600 operand0 = replace_equiv_address (operand0, tem);
1603 /* On targets with non-equivalent space registers, break out unscaled
1604 indexed addresses from the source operand before the final CSE.
1605 We have to do this because the REG_POINTER flag is not correctly
1606 carried through various optimization passes and CSE may substitute
1607 a pseudo without the pointer set for one with the pointer set. As
1608 a result, we loose various opportunities to create insns with
1609 unscaled indexed addresses. */
1610 if (!TARGET_NO_SPACE_REGS
1611 && !cse_not_expected
1612 && GET_CODE (operand1) == MEM
1613 && GET_CODE (XEXP (operand1, 0)) == PLUS
1614 && REG_P (XEXP (XEXP (operand1, 0), 0))
1615 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1617 = replace_equiv_address (operand1,
1618 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1621 && reload_in_progress && GET_CODE (operand0) == REG
1622 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1623 operand0 = reg_equiv_mem (REGNO (operand0));
1624 else if (scratch_reg
1625 && reload_in_progress && GET_CODE (operand0) == SUBREG
1626 && GET_CODE (SUBREG_REG (operand0)) == REG
1627 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1629 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1630 the code which tracks sets/uses for delete_output_reload. */
1631 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1632 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1633 SUBREG_BYTE (operand0));
1634 operand0 = alter_subreg (&temp);
1638 && reload_in_progress && GET_CODE (operand1) == REG
1639 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1640 operand1 = reg_equiv_mem (REGNO (operand1));
1641 else if (scratch_reg
1642 && reload_in_progress && GET_CODE (operand1) == SUBREG
1643 && GET_CODE (SUBREG_REG (operand1)) == REG
1644 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1646 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1647 the code which tracks sets/uses for delete_output_reload. */
1648 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1649 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1650 SUBREG_BYTE (operand1));
1651 operand1 = alter_subreg (&temp);
1654 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1655 && ((tem = find_replacement (&XEXP (operand0, 0)))
1656 != XEXP (operand0, 0)))
1657 operand0 = replace_equiv_address (operand0, tem);
1659 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1660 && ((tem = find_replacement (&XEXP (operand1, 0)))
1661 != XEXP (operand1, 0)))
1662 operand1 = replace_equiv_address (operand1, tem);
1664 /* Handle secondary reloads for loads/stores of FP registers from
1665 REG+D addresses where D does not fit in 5 or 14 bits, including
1666 (subreg (mem (addr))) cases. */
1668 && fp_reg_operand (operand0, mode)
1669 && ((GET_CODE (operand1) == MEM
1670 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1671 XEXP (operand1, 0)))
1672 || ((GET_CODE (operand1) == SUBREG
1673 && GET_CODE (XEXP (operand1, 0)) == MEM
1674 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1676 XEXP (XEXP (operand1, 0), 0))))))
1678 if (GET_CODE (operand1) == SUBREG)
1679 operand1 = XEXP (operand1, 0);
1681 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1682 it in WORD_MODE regardless of what mode it was originally given
1684 scratch_reg = force_mode (word_mode, scratch_reg);
1686 /* D might not fit in 14 bits either; for such cases load D into
1688 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1690 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1691 emit_move_insn (scratch_reg,
1692 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1694 XEXP (XEXP (operand1, 0), 0),
1698 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1699 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1700 replace_equiv_address (operand1, scratch_reg)));
1703 else if (scratch_reg
1704 && fp_reg_operand (operand1, mode)
1705 && ((GET_CODE (operand0) == MEM
1706 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1708 XEXP (operand0, 0)))
1709 || ((GET_CODE (operand0) == SUBREG)
1710 && GET_CODE (XEXP (operand0, 0)) == MEM
1711 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1713 XEXP (XEXP (operand0, 0), 0)))))
1715 if (GET_CODE (operand0) == SUBREG)
1716 operand0 = XEXP (operand0, 0);
1718 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1719 it in WORD_MODE regardless of what mode it was originally given
1721 scratch_reg = force_mode (word_mode, scratch_reg);
1723 /* D might not fit in 14 bits either; for such cases load D into
1725 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1727 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1728 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1731 XEXP (XEXP (operand0, 0),
1736 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1737 emit_insn (gen_rtx_SET (VOIDmode,
1738 replace_equiv_address (operand0, scratch_reg),
1742 /* Handle secondary reloads for loads of FP registers from constant
1743 expressions by forcing the constant into memory.
1745 Use scratch_reg to hold the address of the memory location.
1747 The proper fix is to change TARGET_PREFERRED_RELOAD_CLASS to return
1748 NO_REGS when presented with a const_int and a register class
1749 containing only FP registers. Doing so unfortunately creates
1750 more problems than it solves. Fix this for 2.5. */
1751 else if (scratch_reg
1752 && CONSTANT_P (operand1)
1753 && fp_reg_operand (operand0, mode))
1755 rtx const_mem, xoperands[2];
1757 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1758 it in WORD_MODE regardless of what mode it was originally given
1760 scratch_reg = force_mode (word_mode, scratch_reg);
1762 /* Force the constant into memory and put the address of the
1763 memory location into scratch_reg. */
1764 const_mem = force_const_mem (mode, operand1);
1765 xoperands[0] = scratch_reg;
1766 xoperands[1] = XEXP (const_mem, 0);
1767 emit_move_sequence (xoperands, Pmode, 0);
1769 /* Now load the destination register. */
1770 emit_insn (gen_rtx_SET (mode, operand0,
1771 replace_equiv_address (const_mem, scratch_reg)));
1774 /* Handle secondary reloads for SAR. These occur when trying to load
1775 the SAR from memory or a constant. */
1776 else if (scratch_reg
1777 && GET_CODE (operand0) == REG
1778 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1779 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1780 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1782 /* D might not fit in 14 bits either; for such cases load D into
1784 if (GET_CODE (operand1) == MEM
1785 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1787 /* We are reloading the address into the scratch register, so we
1788 want to make sure the scratch register is a full register. */
1789 scratch_reg = force_mode (word_mode, scratch_reg);
1791 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1792 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1795 XEXP (XEXP (operand1, 0),
1799 /* Now we are going to load the scratch register from memory,
1800 we want to load it in the same width as the original MEM,
1801 which must be the same as the width of the ultimate destination,
1803 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1805 emit_move_insn (scratch_reg,
1806 replace_equiv_address (operand1, scratch_reg));
1810 /* We want to load the scratch register using the same mode as
1811 the ultimate destination. */
1812 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1814 emit_move_insn (scratch_reg, operand1);
1817 /* And emit the insn to set the ultimate destination. We know that
1818 the scratch register has the same mode as the destination at this
1820 emit_move_insn (operand0, scratch_reg);
1823 /* Handle the most common case: storing into a register. */
1824 else if (register_operand (operand0, mode))
1826 if (register_operand (operand1, mode)
1827 || (GET_CODE (operand1) == CONST_INT
1828 && cint_ok_for_move (INTVAL (operand1)))
1829 || (operand1 == CONST0_RTX (mode))
1830 || (GET_CODE (operand1) == HIGH
1831 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1832 /* Only `general_operands' can come here, so MEM is ok. */
1833 || GET_CODE (operand1) == MEM)
1835 /* Various sets are created during RTL generation which don't
1836 have the REG_POINTER flag correctly set. After the CSE pass,
1837 instruction recognition can fail if we don't consistently
1838 set this flag when performing register copies. This should
1839 also improve the opportunities for creating insns that use
1840 unscaled indexing. */
1841 if (REG_P (operand0) && REG_P (operand1))
1843 if (REG_POINTER (operand1)
1844 && !REG_POINTER (operand0)
1845 && !HARD_REGISTER_P (operand0))
1846 copy_reg_pointer (operand0, operand1);
1849 /* When MEMs are broken out, the REG_POINTER flag doesn't
1850 get set. In some cases, we can set the REG_POINTER flag
1851 from the declaration for the MEM. */
1852 if (REG_P (operand0)
1853 && GET_CODE (operand1) == MEM
1854 && !REG_POINTER (operand0))
1856 tree decl = MEM_EXPR (operand1);
1858 /* Set the register pointer flag and register alignment
1859 if the declaration for this memory reference is a
1865 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1867 if (TREE_CODE (decl) == COMPONENT_REF)
1868 decl = TREE_OPERAND (decl, 1);
1870 type = TREE_TYPE (decl);
1871 type = strip_array_types (type);
1873 if (POINTER_TYPE_P (type))
1877 type = TREE_TYPE (type);
1878 /* Using TYPE_ALIGN_OK is rather conservative as
1879 only the ada frontend actually sets it. */
1880 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1882 mark_reg_pointer (operand0, align);
1887 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1891 else if (GET_CODE (operand0) == MEM)
1893 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1894 && !(reload_in_progress || reload_completed))
1896 rtx temp = gen_reg_rtx (DFmode);
1898 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1899 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1902 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1904 /* Run this case quickly. */
1905 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1908 if (! (reload_in_progress || reload_completed))
1910 operands[0] = validize_mem (operand0);
1911 operands[1] = operand1 = force_reg (mode, operand1);
1915 /* Simplify the source if we need to.
1916 Note we do have to handle function labels here, even though we do
1917 not consider them legitimate constants. Loop optimizations can
1918 call the emit_move_xxx with one as a source. */
1919 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1920 || function_label_operand (operand1, VOIDmode)
1921 || (GET_CODE (operand1) == HIGH
1922 && symbolic_operand (XEXP (operand1, 0), mode)))
1926 if (GET_CODE (operand1) == HIGH)
1929 operand1 = XEXP (operand1, 0);
1931 if (symbolic_operand (operand1, mode))
1933 /* Argh. The assembler and linker can't handle arithmetic
1936 So we force the plabel into memory, load operand0 from
1937 the memory location, then add in the constant part. */
1938 if ((GET_CODE (operand1) == CONST
1939 && GET_CODE (XEXP (operand1, 0)) == PLUS
1940 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
1942 || function_label_operand (operand1, VOIDmode))
1944 rtx temp, const_part;
1946 /* Figure out what (if any) scratch register to use. */
1947 if (reload_in_progress || reload_completed)
1949 scratch_reg = scratch_reg ? scratch_reg : operand0;
1950 /* SCRATCH_REG will hold an address and maybe the actual
1951 data. We want it in WORD_MODE regardless of what mode it
1952 was originally given to us. */
1953 scratch_reg = force_mode (word_mode, scratch_reg);
1956 scratch_reg = gen_reg_rtx (Pmode);
1958 if (GET_CODE (operand1) == CONST)
1960 /* Save away the constant part of the expression. */
1961 const_part = XEXP (XEXP (operand1, 0), 1);
1962 gcc_assert (GET_CODE (const_part) == CONST_INT);
1964 /* Force the function label into memory. */
1965 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1969 /* No constant part. */
1970 const_part = NULL_RTX;
1972 /* Force the function label into memory. */
1973 temp = force_const_mem (mode, operand1);
1977 /* Get the address of the memory location. PIC-ify it if
1979 temp = XEXP (temp, 0);
1981 temp = legitimize_pic_address (temp, mode, scratch_reg);
1983 /* Put the address of the memory location into our destination
1986 emit_move_sequence (operands, mode, scratch_reg);
1988 /* Now load from the memory location into our destination
1990 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1991 emit_move_sequence (operands, mode, scratch_reg);
1993 /* And add back in the constant part. */
1994 if (const_part != NULL_RTX)
1995 expand_inc (operand0, const_part);
2004 if (reload_in_progress || reload_completed)
2006 temp = scratch_reg ? scratch_reg : operand0;
2007 /* TEMP will hold an address and maybe the actual
2008 data. We want it in WORD_MODE regardless of what mode it
2009 was originally given to us. */
2010 temp = force_mode (word_mode, temp);
2013 temp = gen_reg_rtx (Pmode);
2015 /* (const (plus (symbol) (const_int))) must be forced to
2016 memory during/after reload if the const_int will not fit
2018 if (GET_CODE (operand1) == CONST
2019 && GET_CODE (XEXP (operand1, 0)) == PLUS
2020 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2021 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
2022 && (reload_completed || reload_in_progress)
2025 rtx const_mem = force_const_mem (mode, operand1);
2026 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
2028 operands[1] = replace_equiv_address (const_mem, operands[1]);
2029 emit_move_sequence (operands, mode, temp);
2033 operands[1] = legitimize_pic_address (operand1, mode, temp);
2034 if (REG_P (operand0) && REG_P (operands[1]))
2035 copy_reg_pointer (operand0, operands[1]);
2036 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
2039 /* On the HPPA, references to data space are supposed to use dp,
2040 register 27, but showing it in the RTL inhibits various cse
2041 and loop optimizations. */
2046 if (reload_in_progress || reload_completed)
2048 temp = scratch_reg ? scratch_reg : operand0;
2049 /* TEMP will hold an address and maybe the actual
2050 data. We want it in WORD_MODE regardless of what mode it
2051 was originally given to us. */
2052 temp = force_mode (word_mode, temp);
2055 temp = gen_reg_rtx (mode);
2057 /* Loading a SYMBOL_REF into a register makes that register
2058 safe to be used as the base in an indexed address.
2060 Don't mark hard registers though. That loses. */
2061 if (GET_CODE (operand0) == REG
2062 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2063 mark_reg_pointer (operand0, BITS_PER_UNIT);
2064 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2065 mark_reg_pointer (temp, BITS_PER_UNIT);
2068 set = gen_rtx_SET (mode, operand0, temp);
2070 set = gen_rtx_SET (VOIDmode,
2072 gen_rtx_LO_SUM (mode, temp, operand1));
2074 emit_insn (gen_rtx_SET (VOIDmode,
2076 gen_rtx_HIGH (mode, operand1)));
2082 else if (pa_tls_referenced_p (operand1))
2087 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2089 addend = XEXP (XEXP (tmp, 0), 1);
2090 tmp = XEXP (XEXP (tmp, 0), 0);
2093 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2094 tmp = legitimize_tls_address (tmp);
2097 tmp = gen_rtx_PLUS (mode, tmp, addend);
2098 tmp = force_operand (tmp, operands[0]);
2102 else if (GET_CODE (operand1) != CONST_INT
2103 || !cint_ok_for_move (INTVAL (operand1)))
2107 HOST_WIDE_INT value = 0;
2108 HOST_WIDE_INT insv = 0;
2111 if (GET_CODE (operand1) == CONST_INT)
2112 value = INTVAL (operand1);
2115 && GET_CODE (operand1) == CONST_INT
2116 && HOST_BITS_PER_WIDE_INT > 32
2117 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2121 /* Extract the low order 32 bits of the value and sign extend.
2122 If the new value is the same as the original value, we can
2123 can use the original value as-is. If the new value is
2124 different, we use it and insert the most-significant 32-bits
2125 of the original value into the final result. */
2126 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2127 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2130 #if HOST_BITS_PER_WIDE_INT > 32
2131 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2135 operand1 = GEN_INT (nval);
2139 if (reload_in_progress || reload_completed)
2140 temp = scratch_reg ? scratch_reg : operand0;
2142 temp = gen_reg_rtx (mode);
2144 /* We don't directly split DImode constants on 32-bit targets
2145 because PLUS uses an 11-bit immediate and the insn sequence
2146 generated is not as efficient as the one using HIGH/LO_SUM. */
2147 if (GET_CODE (operand1) == CONST_INT
2148 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2149 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2152 /* Directly break constant into high and low parts. This
2153 provides better optimization opportunities because various
2154 passes recognize constants split with PLUS but not LO_SUM.
2155 We use a 14-bit signed low part except when the addition
2156 of 0x4000 to the high part might change the sign of the
2158 HOST_WIDE_INT low = value & 0x3fff;
2159 HOST_WIDE_INT high = value & ~ 0x3fff;
2163 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2171 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2172 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2176 emit_insn (gen_rtx_SET (VOIDmode, temp,
2177 gen_rtx_HIGH (mode, operand1)));
2178 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2181 insn = emit_move_insn (operands[0], operands[1]);
2183 /* Now insert the most significant 32 bits of the value
2184 into the register. When we don't have a second register
2185 available, it could take up to nine instructions to load
2186 a 64-bit integer constant. Prior to reload, we force
2187 constants that would take more than three instructions
2188 to load to the constant pool. During and after reload,
2189 we have to handle all possible values. */
2192 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2193 register and the value to be inserted is outside the
2194 range that can be loaded with three depdi instructions. */
2195 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2197 operand1 = GEN_INT (insv);
2199 emit_insn (gen_rtx_SET (VOIDmode, temp,
2200 gen_rtx_HIGH (mode, operand1)));
2201 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2202 emit_insn (gen_insv (operand0, GEN_INT (32),
2207 int len = 5, pos = 27;
2209 /* Insert the bits using the depdi instruction. */
2212 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2213 HOST_WIDE_INT sign = v5 < 0;
2215 /* Left extend the insertion. */
2216 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2217 while (pos > 0 && (insv & 1) == sign)
2219 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2224 emit_insn (gen_insv (operand0, GEN_INT (len),
2225 GEN_INT (pos), GEN_INT (v5)));
2227 len = pos > 0 && pos < 5 ? pos : 5;
2233 set_unique_reg_note (insn, REG_EQUAL, op1);
2238 /* Now have insn-emit do whatever it normally does. */
2242 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2243 it will need a link/runtime reloc). */
2246 reloc_needed (tree exp)
2250 switch (TREE_CODE (exp))
2255 case POINTER_PLUS_EXPR:
2258 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2259 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2263 case NON_LVALUE_EXPR:
2264 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2270 unsigned HOST_WIDE_INT ix;
2272 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2274 reloc |= reloc_needed (value);
2288 /* Return the best assembler insn template
2289 for moving operands[1] into operands[0] as a fullword. */
2291 singlemove_string (rtx *operands)
2293 HOST_WIDE_INT intval;
2295 if (GET_CODE (operands[0]) == MEM)
2296 return "stw %r1,%0";
2297 if (GET_CODE (operands[1]) == MEM)
2299 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2304 gcc_assert (GET_MODE (operands[1]) == SFmode);
2306 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2308 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2309 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2311 operands[1] = GEN_INT (i);
2312 /* Fall through to CONST_INT case. */
2314 if (GET_CODE (operands[1]) == CONST_INT)
2316 intval = INTVAL (operands[1]);
2318 if (VAL_14_BITS_P (intval))
2320 else if ((intval & 0x7ff) == 0)
2321 return "ldil L'%1,%0";
2322 else if (zdepi_cint_p (intval))
2323 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2325 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2327 return "copy %1,%0";
2331 /* Compute position (in OP[1]) and width (in OP[2])
2332 useful for copying IMM to a register using the zdepi
2333 instructions. Store the immediate value to insert in OP[0]. */
2335 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2339 /* Find the least significant set bit in IMM. */
2340 for (lsb = 0; lsb < 32; lsb++)
2347 /* Choose variants based on *sign* of the 5-bit field. */
2348 if ((imm & 0x10) == 0)
2349 len = (lsb <= 28) ? 4 : 32 - lsb;
2352 /* Find the width of the bitstring in IMM. */
2353 for (len = 5; len < 32 - lsb; len++)
2355 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2359 /* Sign extend IMM as a 5-bit value. */
2360 imm = (imm & 0xf) - 0x10;
2368 /* Compute position (in OP[1]) and width (in OP[2])
2369 useful for copying IMM to a register using the depdi,z
2370 instructions. Store the immediate value to insert in OP[0]. */
2372 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2374 int lsb, len, maxlen;
2376 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2378 /* Find the least significant set bit in IMM. */
2379 for (lsb = 0; lsb < maxlen; lsb++)
2386 /* Choose variants based on *sign* of the 5-bit field. */
2387 if ((imm & 0x10) == 0)
2388 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2391 /* Find the width of the bitstring in IMM. */
2392 for (len = 5; len < maxlen - lsb; len++)
2394 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2398 /* Extend length if host is narrow and IMM is negative. */
2399 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2402 /* Sign extend IMM as a 5-bit value. */
2403 imm = (imm & 0xf) - 0x10;
2411 /* Output assembler code to perform a doubleword move insn
2412 with operands OPERANDS. */
2415 output_move_double (rtx *operands)
2417 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2419 rtx addreg0 = 0, addreg1 = 0;
2421 /* First classify both operands. */
2423 if (REG_P (operands[0]))
2425 else if (offsettable_memref_p (operands[0]))
2427 else if (GET_CODE (operands[0]) == MEM)
2432 if (REG_P (operands[1]))
2434 else if (CONSTANT_P (operands[1]))
2436 else if (offsettable_memref_p (operands[1]))
2438 else if (GET_CODE (operands[1]) == MEM)
2443 /* Check for the cases that the operand constraints are not
2444 supposed to allow to happen. */
2445 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2447 /* Handle copies between general and floating registers. */
2449 if (optype0 == REGOP && optype1 == REGOP
2450 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2452 if (FP_REG_P (operands[0]))
2454 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2455 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2456 return "{fldds|fldd} -16(%%sp),%0";
2460 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2461 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2462 return "{ldws|ldw} -12(%%sp),%R0";
2466 /* Handle auto decrementing and incrementing loads and stores
2467 specifically, since the structure of the function doesn't work
2468 for them without major modification. Do it better when we learn
2469 this port about the general inc/dec addressing of PA.
2470 (This was written by tege. Chide him if it doesn't work.) */
2472 if (optype0 == MEMOP)
2474 /* We have to output the address syntax ourselves, since print_operand
2475 doesn't deal with the addresses we want to use. Fix this later. */
2477 rtx addr = XEXP (operands[0], 0);
2478 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2480 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2482 operands[0] = XEXP (addr, 0);
2483 gcc_assert (GET_CODE (operands[1]) == REG
2484 && GET_CODE (operands[0]) == REG);
2486 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2488 /* No overlap between high target register and address
2489 register. (We do this in a non-obvious way to
2490 save a register file writeback) */
2491 if (GET_CODE (addr) == POST_INC)
2492 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2493 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2495 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2497 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2499 operands[0] = XEXP (addr, 0);
2500 gcc_assert (GET_CODE (operands[1]) == REG
2501 && GET_CODE (operands[0]) == REG);
2503 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2504 /* No overlap between high target register and address
2505 register. (We do this in a non-obvious way to save a
2506 register file writeback) */
2507 if (GET_CODE (addr) == PRE_INC)
2508 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2509 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2512 if (optype1 == MEMOP)
2514 /* We have to output the address syntax ourselves, since print_operand
2515 doesn't deal with the addresses we want to use. Fix this later. */
2517 rtx addr = XEXP (operands[1], 0);
2518 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2520 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2522 operands[1] = XEXP (addr, 0);
2523 gcc_assert (GET_CODE (operands[0]) == REG
2524 && GET_CODE (operands[1]) == REG);
2526 if (!reg_overlap_mentioned_p (high_reg, addr))
2528 /* No overlap between high target register and address
2529 register. (We do this in a non-obvious way to
2530 save a register file writeback) */
2531 if (GET_CODE (addr) == POST_INC)
2532 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2533 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2537 /* This is an undefined situation. We should load into the
2538 address register *and* update that register. Probably
2539 we don't need to handle this at all. */
2540 if (GET_CODE (addr) == POST_INC)
2541 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2542 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2545 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2547 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2549 operands[1] = XEXP (addr, 0);
2550 gcc_assert (GET_CODE (operands[0]) == REG
2551 && GET_CODE (operands[1]) == REG);
2553 if (!reg_overlap_mentioned_p (high_reg, addr))
2555 /* No overlap between high target register and address
2556 register. (We do this in a non-obvious way to
2557 save a register file writeback) */
2558 if (GET_CODE (addr) == PRE_INC)
2559 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2560 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2564 /* This is an undefined situation. We should load into the
2565 address register *and* update that register. Probably
2566 we don't need to handle this at all. */
2567 if (GET_CODE (addr) == PRE_INC)
2568 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2569 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2572 else if (GET_CODE (addr) == PLUS
2573 && GET_CODE (XEXP (addr, 0)) == MULT)
2576 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2578 if (!reg_overlap_mentioned_p (high_reg, addr))
2580 xoperands[0] = high_reg;
2581 xoperands[1] = XEXP (addr, 1);
2582 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2583 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2584 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2586 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2590 xoperands[0] = high_reg;
2591 xoperands[1] = XEXP (addr, 1);
2592 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2593 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2594 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2596 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2601 /* If an operand is an unoffsettable memory ref, find a register
2602 we can increment temporarily to make it refer to the second word. */
2604 if (optype0 == MEMOP)
2605 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2607 if (optype1 == MEMOP)
2608 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2610 /* Ok, we can do one word at a time.
2611 Normally we do the low-numbered word first.
2613 In either case, set up in LATEHALF the operands to use
2614 for the high-numbered word and in some cases alter the
2615 operands in OPERANDS to be suitable for the low-numbered word. */
2617 if (optype0 == REGOP)
2618 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2619 else if (optype0 == OFFSOP)
2620 latehalf[0] = adjust_address (operands[0], SImode, 4);
2622 latehalf[0] = operands[0];
2624 if (optype1 == REGOP)
2625 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2626 else if (optype1 == OFFSOP)
2627 latehalf[1] = adjust_address (operands[1], SImode, 4);
2628 else if (optype1 == CNSTOP)
2629 split_double (operands[1], &operands[1], &latehalf[1]);
2631 latehalf[1] = operands[1];
2633 /* If the first move would clobber the source of the second one,
2634 do them in the other order.
2636 This can happen in two cases:
2638 mem -> register where the first half of the destination register
2639 is the same register used in the memory's address. Reload
2640 can create such insns.
2642 mem in this case will be either register indirect or register
2643 indirect plus a valid offset.
2645 register -> register move where REGNO(dst) == REGNO(src + 1)
2646 someone (Tim/Tege?) claimed this can happen for parameter loads.
2648 Handle mem -> register case first. */
2649 if (optype0 == REGOP
2650 && (optype1 == MEMOP || optype1 == OFFSOP)
2651 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2654 /* Do the late half first. */
2656 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2657 output_asm_insn (singlemove_string (latehalf), latehalf);
2661 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2662 return singlemove_string (operands);
2665 /* Now handle register -> register case. */
2666 if (optype0 == REGOP && optype1 == REGOP
2667 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2669 output_asm_insn (singlemove_string (latehalf), latehalf);
2670 return singlemove_string (operands);
2673 /* Normal case: do the two words, low-numbered first. */
2675 output_asm_insn (singlemove_string (operands), operands);
2677 /* Make any unoffsettable addresses point at high-numbered word. */
2679 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2681 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2684 output_asm_insn (singlemove_string (latehalf), latehalf);
2686 /* Undo the adds we just did. */
2688 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2690 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2696 output_fp_move_double (rtx *operands)
2698 if (FP_REG_P (operands[0]))
2700 if (FP_REG_P (operands[1])
2701 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2702 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2704 output_asm_insn ("fldd%F1 %1,%0", operands);
2706 else if (FP_REG_P (operands[1]))
2708 output_asm_insn ("fstd%F0 %1,%0", operands);
2714 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2716 /* This is a pain. You have to be prepared to deal with an
2717 arbitrary address here including pre/post increment/decrement.
2719 so avoid this in the MD. */
2720 gcc_assert (GET_CODE (operands[0]) == REG);
2722 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2723 xoperands[0] = operands[0];
2724 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2729 /* Return a REG that occurs in ADDR with coefficient 1.
2730 ADDR can be effectively incremented by incrementing REG. */
2733 find_addr_reg (rtx addr)
2735 while (GET_CODE (addr) == PLUS)
2737 if (GET_CODE (XEXP (addr, 0)) == REG)
2738 addr = XEXP (addr, 0);
2739 else if (GET_CODE (XEXP (addr, 1)) == REG)
2740 addr = XEXP (addr, 1);
2741 else if (CONSTANT_P (XEXP (addr, 0)))
2742 addr = XEXP (addr, 1);
2743 else if (CONSTANT_P (XEXP (addr, 1)))
2744 addr = XEXP (addr, 0);
2748 gcc_assert (GET_CODE (addr) == REG);
2752 /* Emit code to perform a block move.
2754 OPERANDS[0] is the destination pointer as a REG, clobbered.
2755 OPERANDS[1] is the source pointer as a REG, clobbered.
2756 OPERANDS[2] is a register for temporary storage.
2757 OPERANDS[3] is a register for temporary storage.
2758 OPERANDS[4] is the size as a CONST_INT
2759 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2760 OPERANDS[6] is another temporary register. */
2763 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2765 int align = INTVAL (operands[5]);
2766 unsigned long n_bytes = INTVAL (operands[4]);
2768 /* We can't move more than a word at a time because the PA
2769 has no longer integer move insns. (Could use fp mem ops?) */
2770 if (align > (TARGET_64BIT ? 8 : 4))
2771 align = (TARGET_64BIT ? 8 : 4);
2773 /* Note that we know each loop below will execute at least twice
2774 (else we would have open-coded the copy). */
2778 /* Pre-adjust the loop counter. */
2779 operands[4] = GEN_INT (n_bytes - 16);
2780 output_asm_insn ("ldi %4,%2", operands);
2783 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2784 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2785 output_asm_insn ("std,ma %3,8(%0)", operands);
2786 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2787 output_asm_insn ("std,ma %6,8(%0)", operands);
2789 /* Handle the residual. There could be up to 7 bytes of
2790 residual to copy! */
2791 if (n_bytes % 16 != 0)
2793 operands[4] = GEN_INT (n_bytes % 8);
2794 if (n_bytes % 16 >= 8)
2795 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2796 if (n_bytes % 8 != 0)
2797 output_asm_insn ("ldd 0(%1),%6", operands);
2798 if (n_bytes % 16 >= 8)
2799 output_asm_insn ("std,ma %3,8(%0)", operands);
2800 if (n_bytes % 8 != 0)
2801 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2806 /* Pre-adjust the loop counter. */
2807 operands[4] = GEN_INT (n_bytes - 8);
2808 output_asm_insn ("ldi %4,%2", operands);
2811 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2812 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2813 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2814 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2815 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2817 /* Handle the residual. There could be up to 7 bytes of
2818 residual to copy! */
2819 if (n_bytes % 8 != 0)
2821 operands[4] = GEN_INT (n_bytes % 4);
2822 if (n_bytes % 8 >= 4)
2823 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2824 if (n_bytes % 4 != 0)
2825 output_asm_insn ("ldw 0(%1),%6", operands);
2826 if (n_bytes % 8 >= 4)
2827 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2828 if (n_bytes % 4 != 0)
2829 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2834 /* Pre-adjust the loop counter. */
2835 operands[4] = GEN_INT (n_bytes - 4);
2836 output_asm_insn ("ldi %4,%2", operands);
2839 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2840 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2841 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2842 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2843 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2845 /* Handle the residual. */
2846 if (n_bytes % 4 != 0)
2848 if (n_bytes % 4 >= 2)
2849 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2850 if (n_bytes % 2 != 0)
2851 output_asm_insn ("ldb 0(%1),%6", operands);
2852 if (n_bytes % 4 >= 2)
2853 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2854 if (n_bytes % 2 != 0)
2855 output_asm_insn ("stb %6,0(%0)", operands);
2860 /* Pre-adjust the loop counter. */
2861 operands[4] = GEN_INT (n_bytes - 2);
2862 output_asm_insn ("ldi %4,%2", operands);
2865 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2866 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2867 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2868 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2869 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2871 /* Handle the residual. */
2872 if (n_bytes % 2 != 0)
2874 output_asm_insn ("ldb 0(%1),%3", operands);
2875 output_asm_insn ("stb %3,0(%0)", operands);
2884 /* Count the number of insns necessary to handle this block move.
2886 Basic structure is the same as emit_block_move, except that we
2887 count insns rather than emit them. */
2890 compute_movmem_length (rtx insn)
2892 rtx pat = PATTERN (insn);
2893 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2894 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2895 unsigned int n_insns = 0;
2897 /* We can't move more than four bytes at a time because the PA
2898 has no longer integer move insns. (Could use fp mem ops?) */
2899 if (align > (TARGET_64BIT ? 8 : 4))
2900 align = (TARGET_64BIT ? 8 : 4);
2902 /* The basic copying loop. */
2906 if (n_bytes % (2 * align) != 0)
2908 if ((n_bytes % (2 * align)) >= align)
2911 if ((n_bytes % align) != 0)
2915 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2919 /* Emit code to perform a block clear.
2921 OPERANDS[0] is the destination pointer as a REG, clobbered.
2922 OPERANDS[1] is a register for temporary storage.
2923 OPERANDS[2] is the size as a CONST_INT
2924 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2927 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2929 int align = INTVAL (operands[3]);
2930 unsigned long n_bytes = INTVAL (operands[2]);
2932 /* We can't clear more than a word at a time because the PA
2933 has no longer integer move insns. */
2934 if (align > (TARGET_64BIT ? 8 : 4))
2935 align = (TARGET_64BIT ? 8 : 4);
2937 /* Note that we know each loop below will execute at least twice
2938 (else we would have open-coded the copy). */
2942 /* Pre-adjust the loop counter. */
2943 operands[2] = GEN_INT (n_bytes - 16);
2944 output_asm_insn ("ldi %2,%1", operands);
2947 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2948 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2949 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2951 /* Handle the residual. There could be up to 7 bytes of
2952 residual to copy! */
2953 if (n_bytes % 16 != 0)
2955 operands[2] = GEN_INT (n_bytes % 8);
2956 if (n_bytes % 16 >= 8)
2957 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2958 if (n_bytes % 8 != 0)
2959 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2964 /* Pre-adjust the loop counter. */
2965 operands[2] = GEN_INT (n_bytes - 8);
2966 output_asm_insn ("ldi %2,%1", operands);
2969 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2970 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2971 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2973 /* Handle the residual. There could be up to 7 bytes of
2974 residual to copy! */
2975 if (n_bytes % 8 != 0)
2977 operands[2] = GEN_INT (n_bytes % 4);
2978 if (n_bytes % 8 >= 4)
2979 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2980 if (n_bytes % 4 != 0)
2981 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2986 /* Pre-adjust the loop counter. */
2987 operands[2] = GEN_INT (n_bytes - 4);
2988 output_asm_insn ("ldi %2,%1", operands);
2991 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2992 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2993 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2995 /* Handle the residual. */
2996 if (n_bytes % 4 != 0)
2998 if (n_bytes % 4 >= 2)
2999 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3000 if (n_bytes % 2 != 0)
3001 output_asm_insn ("stb %%r0,0(%0)", operands);
3006 /* Pre-adjust the loop counter. */
3007 operands[2] = GEN_INT (n_bytes - 2);
3008 output_asm_insn ("ldi %2,%1", operands);
3011 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3012 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3013 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3015 /* Handle the residual. */
3016 if (n_bytes % 2 != 0)
3017 output_asm_insn ("stb %%r0,0(%0)", operands);
3026 /* Count the number of insns necessary to handle this block move.
3028 Basic structure is the same as emit_block_move, except that we
3029 count insns rather than emit them. */
3032 compute_clrmem_length (rtx insn)
3034 rtx pat = PATTERN (insn);
3035 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3036 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3037 unsigned int n_insns = 0;
3039 /* We can't clear more than a word at a time because the PA
3040 has no longer integer move insns. */
3041 if (align > (TARGET_64BIT ? 8 : 4))
3042 align = (TARGET_64BIT ? 8 : 4);
3044 /* The basic loop. */
3048 if (n_bytes % (2 * align) != 0)
3050 if ((n_bytes % (2 * align)) >= align)
3053 if ((n_bytes % align) != 0)
3057 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3063 output_and (rtx *operands)
3065 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3067 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3068 int ls0, ls1, ms0, p, len;
3070 for (ls0 = 0; ls0 < 32; ls0++)
3071 if ((mask & (1 << ls0)) == 0)
3074 for (ls1 = ls0; ls1 < 32; ls1++)
3075 if ((mask & (1 << ls1)) != 0)
3078 for (ms0 = ls1; ms0 < 32; ms0++)
3079 if ((mask & (1 << ms0)) == 0)
3082 gcc_assert (ms0 == 32);
3090 operands[2] = GEN_INT (len);
3091 return "{extru|extrw,u} %1,31,%2,%0";
3095 /* We could use this `depi' for the case above as well, but `depi'
3096 requires one more register file access than an `extru'. */
3101 operands[2] = GEN_INT (p);
3102 operands[3] = GEN_INT (len);
3103 return "{depi|depwi} 0,%2,%3,%0";
3107 return "and %1,%2,%0";
3110 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3111 storing the result in operands[0]. */
3113 output_64bit_and (rtx *operands)
3115 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3117 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3118 int ls0, ls1, ms0, p, len;
3120 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3121 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3124 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3125 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3128 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3129 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3132 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3134 if (ls1 == HOST_BITS_PER_WIDE_INT)
3140 operands[2] = GEN_INT (len);
3141 return "extrd,u %1,63,%2,%0";
3145 /* We could use this `depi' for the case above as well, but `depi'
3146 requires one more register file access than an `extru'. */
3151 operands[2] = GEN_INT (p);
3152 operands[3] = GEN_INT (len);
3153 return "depdi 0,%2,%3,%0";
3157 return "and %1,%2,%0";
3161 output_ior (rtx *operands)
3163 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3164 int bs0, bs1, p, len;
3166 if (INTVAL (operands[2]) == 0)
3167 return "copy %1,%0";
3169 for (bs0 = 0; bs0 < 32; bs0++)
3170 if ((mask & (1 << bs0)) != 0)
3173 for (bs1 = bs0; bs1 < 32; bs1++)
3174 if ((mask & (1 << bs1)) == 0)
3177 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3182 operands[2] = GEN_INT (p);
3183 operands[3] = GEN_INT (len);
3184 return "{depi|depwi} -1,%2,%3,%0";
3187 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3188 storing the result in operands[0]. */
3190 output_64bit_ior (rtx *operands)
3192 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3193 int bs0, bs1, p, len;
3195 if (INTVAL (operands[2]) == 0)
3196 return "copy %1,%0";
3198 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3199 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3202 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3203 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3206 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3207 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3212 operands[2] = GEN_INT (p);
3213 operands[3] = GEN_INT (len);
3214 return "depdi -1,%2,%3,%0";
3217 /* Target hook for assembling integer objects. This code handles
3218 aligned SI and DI integers specially since function references
3219 must be preceded by P%. */
3222 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3224 if (size == UNITS_PER_WORD
3226 && function_label_operand (x, VOIDmode))
3228 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3229 output_addr_const (asm_out_file, x);
3230 fputc ('\n', asm_out_file);
3233 return default_assemble_integer (x, size, aligned_p);
3236 /* Output an ascii string. */
3238 output_ascii (FILE *file, const char *p, int size)
3242 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3244 /* The HP assembler can only take strings of 256 characters at one
3245 time. This is a limitation on input line length, *not* the
3246 length of the string. Sigh. Even worse, it seems that the
3247 restriction is in number of input characters (see \xnn &
3248 \whatever). So we have to do this very carefully. */
3250 fputs ("\t.STRING \"", file);
3253 for (i = 0; i < size; i += 4)
3257 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3259 register unsigned int c = (unsigned char) p[i + io];
3261 if (c == '\"' || c == '\\')
3262 partial_output[co++] = '\\';
3263 if (c >= ' ' && c < 0177)
3264 partial_output[co++] = c;
3268 partial_output[co++] = '\\';
3269 partial_output[co++] = 'x';
3270 hexd = c / 16 - 0 + '0';
3272 hexd -= '9' - 'a' + 1;
3273 partial_output[co++] = hexd;
3274 hexd = c % 16 - 0 + '0';
3276 hexd -= '9' - 'a' + 1;
3277 partial_output[co++] = hexd;
3280 if (chars_output + co > 243)
3282 fputs ("\"\n\t.STRING \"", file);
3285 fwrite (partial_output, 1, (size_t) co, file);
3289 fputs ("\"\n", file);
3292 /* Try to rewrite floating point comparisons & branches to avoid
3293 useless add,tr insns.
3295 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3296 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3297 first attempt to remove useless add,tr insns. It is zero
3298 for the second pass as reorg sometimes leaves bogus REG_DEAD
3301 When CHECK_NOTES is zero we can only eliminate add,tr insns
3302 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3305 remove_useless_addtr_insns (int check_notes)
3308 static int pass = 0;
3310 /* This is fairly cheap, so always run it when optimizing. */
3314 int fbranch_count = 0;
3316 /* Walk all the insns in this function looking for fcmp & fbranch
3317 instructions. Keep track of how many of each we find. */
3318 for (insn = get_insns (); insn; insn = next_insn (insn))
3322 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3323 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3326 tmp = PATTERN (insn);
3328 /* It must be a set. */
3329 if (GET_CODE (tmp) != SET)
3332 /* If the destination is CCFP, then we've found an fcmp insn. */
3333 tmp = SET_DEST (tmp);
3334 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3340 tmp = PATTERN (insn);
3341 /* If this is an fbranch instruction, bump the fbranch counter. */
3342 if (GET_CODE (tmp) == SET
3343 && SET_DEST (tmp) == pc_rtx
3344 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3345 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3346 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3347 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3355 /* Find all floating point compare + branch insns. If possible,
3356 reverse the comparison & the branch to avoid add,tr insns. */
3357 for (insn = get_insns (); insn; insn = next_insn (insn))
3361 /* Ignore anything that isn't an INSN. */
3362 if (GET_CODE (insn) != INSN)
3365 tmp = PATTERN (insn);
3367 /* It must be a set. */
3368 if (GET_CODE (tmp) != SET)
3371 /* The destination must be CCFP, which is register zero. */
3372 tmp = SET_DEST (tmp);
3373 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3376 /* INSN should be a set of CCFP.
3378 See if the result of this insn is used in a reversed FP
3379 conditional branch. If so, reverse our condition and
3380 the branch. Doing so avoids useless add,tr insns. */
3381 next = next_insn (insn);
3384 /* Jumps, calls and labels stop our search. */
3385 if (GET_CODE (next) == JUMP_INSN
3386 || GET_CODE (next) == CALL_INSN
3387 || GET_CODE (next) == CODE_LABEL)
3390 /* As does another fcmp insn. */
3391 if (GET_CODE (next) == INSN
3392 && GET_CODE (PATTERN (next)) == SET
3393 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3394 && REGNO (SET_DEST (PATTERN (next))) == 0)
3397 next = next_insn (next);
3400 /* Is NEXT_INSN a branch? */
3402 && GET_CODE (next) == JUMP_INSN)
3404 rtx pattern = PATTERN (next);
3406 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3407 and CCFP dies, then reverse our conditional and the branch
3408 to avoid the add,tr. */
3409 if (GET_CODE (pattern) == SET
3410 && SET_DEST (pattern) == pc_rtx
3411 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3412 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3413 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3414 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3415 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3416 && (fcmp_count == fbranch_count
3418 && find_regno_note (next, REG_DEAD, 0))))
3420 /* Reverse the branch. */
3421 tmp = XEXP (SET_SRC (pattern), 1);
3422 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3423 XEXP (SET_SRC (pattern), 2) = tmp;
3424 INSN_CODE (next) = -1;
3426 /* Reverse our condition. */
3427 tmp = PATTERN (insn);
3428 PUT_CODE (XEXP (tmp, 1),
3429 (reverse_condition_maybe_unordered
3430 (GET_CODE (XEXP (tmp, 1)))));
3440 /* You may have trouble believing this, but this is the 32 bit HP-PA
3445 Variable arguments (optional; any number may be allocated)
3447 SP-(4*(N+9)) arg word N
3452 Fixed arguments (must be allocated; may remain unused)
3461 SP-32 External Data Pointer (DP)
3463 SP-24 External/stub RP (RP')
3467 SP-8 Calling Stub RP (RP'')
3472 SP-0 Stack Pointer (points to next available address)
3476 /* This function saves registers as follows. Registers marked with ' are
3477 this function's registers (as opposed to the previous function's).
3478 If a frame_pointer isn't needed, r4 is saved as a general register;
3479 the space for the frame pointer is still allocated, though, to keep
3485 SP (FP') Previous FP
3486 SP + 4 Alignment filler (sigh)
3487 SP + 8 Space for locals reserved here.
3491 SP + n All call saved register used.
3495 SP + o All call saved fp registers used.
3499 SP + p (SP') points to next available address.
3503 /* Global variables set by output_function_prologue(). */
3504 /* Size of frame. Need to know this to emit return insns from
3506 static HOST_WIDE_INT actual_fsize, local_fsize;
3507 static int save_fregs;
3509 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3510 Handle case where DISP > 8k by using the add_high_const patterns.
3512 Note in DISP > 8k case, we will leave the high part of the address
3513 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3516 store_reg (int reg, HOST_WIDE_INT disp, int base)
3518 rtx insn, dest, src, basereg;
3520 src = gen_rtx_REG (word_mode, reg);
3521 basereg = gen_rtx_REG (Pmode, base);
3522 if (VAL_14_BITS_P (disp))
3524 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3525 insn = emit_move_insn (dest, src);
3527 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3529 rtx delta = GEN_INT (disp);
3530 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3532 emit_move_insn (tmpreg, delta);
3533 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3536 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3537 gen_rtx_SET (VOIDmode, tmpreg,
3538 gen_rtx_PLUS (Pmode, basereg, delta)));
3539 RTX_FRAME_RELATED_P (insn) = 1;
3541 dest = gen_rtx_MEM (word_mode, tmpreg);
3542 insn = emit_move_insn (dest, src);
3546 rtx delta = GEN_INT (disp);
3547 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3548 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3550 emit_move_insn (tmpreg, high);
3551 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3552 insn = emit_move_insn (dest, src);
3554 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3555 gen_rtx_SET (VOIDmode,
3556 gen_rtx_MEM (word_mode,
3557 gen_rtx_PLUS (word_mode,
3564 RTX_FRAME_RELATED_P (insn) = 1;
3567 /* Emit RTL to store REG at the memory location specified by BASE and then
3568 add MOD to BASE. MOD must be <= 8k. */
3571 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3573 rtx insn, basereg, srcreg, delta;
3575 gcc_assert (VAL_14_BITS_P (mod));
3577 basereg = gen_rtx_REG (Pmode, base);
3578 srcreg = gen_rtx_REG (word_mode, reg);
3579 delta = GEN_INT (mod);
3581 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3584 RTX_FRAME_RELATED_P (insn) = 1;
3586 /* RTX_FRAME_RELATED_P must be set on each frame related set
3587 in a parallel with more than one element. */
3588 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3589 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3593 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3594 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3595 whether to add a frame note or not.
3597 In the DISP > 8k case, we leave the high part of the address in %r1.
3598 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3601 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3605 if (VAL_14_BITS_P (disp))
3607 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3608 plus_constant (gen_rtx_REG (Pmode, base), disp));
3610 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3612 rtx basereg = gen_rtx_REG (Pmode, base);
3613 rtx delta = GEN_INT (disp);
3614 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3616 emit_move_insn (tmpreg, delta);
3617 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3618 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3620 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3621 gen_rtx_SET (VOIDmode, tmpreg,
3622 gen_rtx_PLUS (Pmode, basereg, delta)));
3626 rtx basereg = gen_rtx_REG (Pmode, base);
3627 rtx delta = GEN_INT (disp);
3628 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3630 emit_move_insn (tmpreg,
3631 gen_rtx_PLUS (Pmode, basereg,
3632 gen_rtx_HIGH (Pmode, delta)));
3633 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3634 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3637 if (DO_FRAME_NOTES && note)
3638 RTX_FRAME_RELATED_P (insn) = 1;
3642 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3647 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3648 be consistent with the rounding and size calculation done here.
3649 Change them at the same time. */
3651 /* We do our own stack alignment. First, round the size of the
3652 stack locals up to a word boundary. */
3653 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3655 /* Space for previous frame pointer + filler. If any frame is
3656 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3657 waste some space here for the sake of HP compatibility. The
3658 first slot is only used when the frame pointer is needed. */
3659 if (size || frame_pointer_needed)
3660 size += STARTING_FRAME_OFFSET;
3662 /* If the current function calls __builtin_eh_return, then we need
3663 to allocate stack space for registers that will hold data for
3664 the exception handler. */
3665 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3669 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3671 size += i * UNITS_PER_WORD;
3674 /* Account for space used by the callee general register saves. */
3675 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3676 if (df_regs_ever_live_p (i))
3677 size += UNITS_PER_WORD;
3679 /* Account for space used by the callee floating point register saves. */
3680 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3681 if (df_regs_ever_live_p (i)
3682 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3686 /* We always save both halves of the FP register, so always
3687 increment the frame size by 8 bytes. */
3691 /* If any of the floating registers are saved, account for the
3692 alignment needed for the floating point register save block. */
3695 size = (size + 7) & ~7;
3700 /* The various ABIs include space for the outgoing parameters in the
3701 size of the current function's stack frame. We don't need to align
3702 for the outgoing arguments as their alignment is set by the final
3703 rounding for the frame as a whole. */
3704 size += crtl->outgoing_args_size;
3706 /* Allocate space for the fixed frame marker. This space must be
3707 allocated for any function that makes calls or allocates
3709 if (!current_function_is_leaf || size)
3710 size += TARGET_64BIT ? 48 : 32;
3712 /* Finally, round to the preferred stack boundary. */
3713 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3714 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3717 /* Generate the assembly code for function entry. FILE is a stdio
3718 stream to output the code to. SIZE is an int: how many units of
3719 temporary storage to allocate.
3721 Refer to the array `regs_ever_live' to determine which registers to
3722 save; `regs_ever_live[I]' is nonzero if register number I is ever
3723 used in the function. This function is responsible for knowing
3724 which registers should not be saved even if used. */
3726 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3727 of memory. If any fpu reg is used in the function, we allocate
3728 such a block here, at the bottom of the frame, just in case it's needed.
3730 If this function is a leaf procedure, then we may choose not
3731 to do a "save" insn. The decision about whether or not
3732 to do this is made in regclass.c. */
3735 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3737 /* The function's label and associated .PROC must never be
3738 separated and must be output *after* any profiling declarations
3739 to avoid changing spaces/subspaces within a procedure. */
3740 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3741 fputs ("\t.PROC\n", file);
3743 /* hppa_expand_prologue does the dirty work now. We just need
3744 to output the assembler directives which denote the start
3746 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3747 if (current_function_is_leaf)
3748 fputs (",NO_CALLS", file);
3750 fputs (",CALLS", file);
3752 fputs (",SAVE_RP", file);
3754 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3755 at the beginning of the frame and that it is used as the frame
3756 pointer for the frame. We do this because our current frame
3757 layout doesn't conform to that specified in the HP runtime
3758 documentation and we need a way to indicate to programs such as
3759 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3760 isn't used by HP compilers but is supported by the assembler.
3761 However, SAVE_SP is supposed to indicate that the previous stack
3762 pointer has been saved in the frame marker. */
3763 if (frame_pointer_needed)
3764 fputs (",SAVE_SP", file);
3766 /* Pass on information about the number of callee register saves
3767 performed in the prologue.
3769 The compiler is supposed to pass the highest register number
3770 saved, the assembler then has to adjust that number before
3771 entering it into the unwind descriptor (to account for any
3772 caller saved registers with lower register numbers than the
3773 first callee saved register). */
3775 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3778 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3780 fputs ("\n\t.ENTRY\n", file);
3782 remove_useless_addtr_insns (0);
3786 hppa_expand_prologue (void)
3788 int merge_sp_adjust_with_store = 0;
3789 HOST_WIDE_INT size = get_frame_size ();
3790 HOST_WIDE_INT offset;
3798 /* Compute total size for frame pointer, filler, locals and rounding to
3799 the next word boundary. Similar code appears in compute_frame_size
3800 and must be changed in tandem with this code. */
3801 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3802 if (local_fsize || frame_pointer_needed)
3803 local_fsize += STARTING_FRAME_OFFSET;
3805 actual_fsize = compute_frame_size (size, &save_fregs);
3806 if (flag_stack_usage)
3807 current_function_static_stack_size = actual_fsize;
3809 /* Compute a few things we will use often. */
3810 tmpreg = gen_rtx_REG (word_mode, 1);
3812 /* Save RP first. The calling conventions manual states RP will
3813 always be stored into the caller's frame at sp - 20 or sp - 16
3814 depending on which ABI is in use. */
3815 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3817 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3823 /* Allocate the local frame and set up the frame pointer if needed. */
3824 if (actual_fsize != 0)
3826 if (frame_pointer_needed)
3828 /* Copy the old frame pointer temporarily into %r1. Set up the
3829 new stack pointer, then store away the saved old frame pointer
3830 into the stack at sp and at the same time update the stack
3831 pointer by actual_fsize bytes. Two versions, first
3832 handles small (<8k) frames. The second handles large (>=8k)
3834 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3836 RTX_FRAME_RELATED_P (insn) = 1;
3838 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3840 RTX_FRAME_RELATED_P (insn) = 1;
3842 if (VAL_14_BITS_P (actual_fsize))
3843 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3846 /* It is incorrect to store the saved frame pointer at *sp,
3847 then increment sp (writes beyond the current stack boundary).
3849 So instead use stwm to store at *sp and post-increment the
3850 stack pointer as an atomic operation. Then increment sp to
3851 finish allocating the new frame. */
3852 HOST_WIDE_INT adjust1 = 8192 - 64;
3853 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3855 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3856 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3860 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3861 we need to store the previous stack pointer (frame pointer)
3862 into the frame marker on targets that use the HP unwind
3863 library. This allows the HP unwind library to be used to
3864 unwind GCC frames. However, we are not fully compatible
3865 with the HP library because our frame layout differs from
3866 that specified in the HP runtime specification.
3868 We don't want a frame note on this instruction as the frame
3869 marker moves during dynamic stack allocation.
3871 This instruction also serves as a blockage to prevent
3872 register spills from being scheduled before the stack
3873 pointer is raised. This is necessary as we store
3874 registers using the frame pointer as a base register,
3875 and the frame pointer is set before sp is raised. */
3876 if (TARGET_HPUX_UNWIND_LIBRARY)
3878 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3879 GEN_INT (TARGET_64BIT ? -8 : -4));
3881 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3882 hard_frame_pointer_rtx);
3885 emit_insn (gen_blockage ());
3887 /* no frame pointer needed. */
3890 /* In some cases we can perform the first callee register save
3891 and allocating the stack frame at the same time. If so, just
3892 make a note of it and defer allocating the frame until saving
3893 the callee registers. */
3894 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3895 merge_sp_adjust_with_store = 1;
3896 /* Can not optimize. Adjust the stack frame by actual_fsize
3899 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3904 /* Normal register save.
3906 Do not save the frame pointer in the frame_pointer_needed case. It
3907 was done earlier. */
3908 if (frame_pointer_needed)
3910 offset = local_fsize;
3912 /* Saving the EH return data registers in the frame is the simplest
3913 way to get the frame unwind information emitted. We put them
3914 just before the general registers. */
3915 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3917 unsigned int i, regno;
3921 regno = EH_RETURN_DATA_REGNO (i);
3922 if (regno == INVALID_REGNUM)
3925 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
3926 offset += UNITS_PER_WORD;
3930 for (i = 18; i >= 4; i--)
3931 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3933 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
3934 offset += UNITS_PER_WORD;
3937 /* Account for %r3 which is saved in a special place. */
3940 /* No frame pointer needed. */
3943 offset = local_fsize - actual_fsize;
3945 /* Saving the EH return data registers in the frame is the simplest
3946 way to get the frame unwind information emitted. */
3947 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3949 unsigned int i, regno;
3953 regno = EH_RETURN_DATA_REGNO (i);
3954 if (regno == INVALID_REGNUM)
3957 /* If merge_sp_adjust_with_store is nonzero, then we can
3958 optimize the first save. */
3959 if (merge_sp_adjust_with_store)
3961 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3962 merge_sp_adjust_with_store = 0;
3965 store_reg (regno, offset, STACK_POINTER_REGNUM);
3966 offset += UNITS_PER_WORD;
3970 for (i = 18; i >= 3; i--)
3971 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3973 /* If merge_sp_adjust_with_store is nonzero, then we can
3974 optimize the first GR save. */
3975 if (merge_sp_adjust_with_store)
3977 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3978 merge_sp_adjust_with_store = 0;
3981 store_reg (i, offset, STACK_POINTER_REGNUM);
3982 offset += UNITS_PER_WORD;
3986 /* If we wanted to merge the SP adjustment with a GR save, but we never
3987 did any GR saves, then just emit the adjustment here. */
3988 if (merge_sp_adjust_with_store)
3989 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3993 /* The hppa calling conventions say that %r19, the pic offset
3994 register, is saved at sp - 32 (in this function's frame)
3995 when generating PIC code. FIXME: What is the correct thing
3996 to do for functions which make no calls and allocate no
3997 frame? Do we need to allocate a frame, or can we just omit
3998 the save? For now we'll just omit the save.
4000 We don't want a note on this insn as the frame marker can
4001 move if there is a dynamic stack allocation. */
4002 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4004 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4006 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4010 /* Align pointer properly (doubleword boundary). */
4011 offset = (offset + 7) & ~7;
4013 /* Floating point register store. */
4018 /* First get the frame or stack pointer to the start of the FP register
4020 if (frame_pointer_needed)
4022 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4023 base = hard_frame_pointer_rtx;
4027 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4028 base = stack_pointer_rtx;
4031 /* Now actually save the FP registers. */
4032 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4034 if (df_regs_ever_live_p (i)
4035 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4037 rtx addr, insn, reg;
4038 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4039 reg = gen_rtx_REG (DFmode, i);
4040 insn = emit_move_insn (addr, reg);
4043 RTX_FRAME_RELATED_P (insn) = 1;
4046 rtx mem = gen_rtx_MEM (DFmode,
4047 plus_constant (base, offset));
4048 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4049 gen_rtx_SET (VOIDmode, mem, reg));
4053 rtx meml = gen_rtx_MEM (SFmode,
4054 plus_constant (base, offset));
4055 rtx memr = gen_rtx_MEM (SFmode,
4056 plus_constant (base, offset + 4));
4057 rtx regl = gen_rtx_REG (SFmode, i);
4058 rtx regr = gen_rtx_REG (SFmode, i + 1);
4059 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
4060 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
4063 RTX_FRAME_RELATED_P (setl) = 1;
4064 RTX_FRAME_RELATED_P (setr) = 1;
4065 vec = gen_rtvec (2, setl, setr);
4066 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4067 gen_rtx_SEQUENCE (VOIDmode, vec));
4070 offset += GET_MODE_SIZE (DFmode);
4077 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4078 Handle case where DISP > 8k by using the add_high_const patterns. */
4081 load_reg (int reg, HOST_WIDE_INT disp, int base)
4083 rtx dest = gen_rtx_REG (word_mode, reg);
4084 rtx basereg = gen_rtx_REG (Pmode, base);
4087 if (VAL_14_BITS_P (disp))
4088 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
4089 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4091 rtx delta = GEN_INT (disp);
4092 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4094 emit_move_insn (tmpreg, delta);
4095 if (TARGET_DISABLE_INDEXING)
4097 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4098 src = gen_rtx_MEM (word_mode, tmpreg);
4101 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4105 rtx delta = GEN_INT (disp);
4106 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4107 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4109 emit_move_insn (tmpreg, high);
4110 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4113 emit_move_insn (dest, src);
4116 /* Update the total code bytes output to the text section. */
4119 update_total_code_bytes (unsigned int nbytes)
4121 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4122 && !IN_NAMED_SECTION_P (cfun->decl))
4124 unsigned int old_total = total_code_bytes;
4126 total_code_bytes += nbytes;
4128 /* Be prepared to handle overflows. */
4129 if (old_total > total_code_bytes)
4130 total_code_bytes = UINT_MAX;
4134 /* This function generates the assembly code for function exit.
4135 Args are as for output_function_prologue ().
4137 The function epilogue should not depend on the current stack
4138 pointer! It should use the frame pointer only. This is mandatory
4139 because of alloca; we also take advantage of it to omit stack
4140 adjustments before returning. */
4143 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4145 rtx insn = get_last_insn ();
4149 /* hppa_expand_epilogue does the dirty work now. We just need
4150 to output the assembler directives which denote the end
4153 To make debuggers happy, emit a nop if the epilogue was completely
4154 eliminated due to a volatile call as the last insn in the
4155 current function. That way the return address (in %r2) will
4156 always point to a valid instruction in the current function. */
4158 /* Get the last real insn. */
4159 if (GET_CODE (insn) == NOTE)
4160 insn = prev_real_insn (insn);
4162 /* If it is a sequence, then look inside. */
4163 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4164 insn = XVECEXP (PATTERN (insn), 0, 0);
4166 /* If insn is a CALL_INSN, then it must be a call to a volatile
4167 function (otherwise there would be epilogue insns). */
4168 if (insn && GET_CODE (insn) == CALL_INSN)
4170 fputs ("\tnop\n", file);
4174 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4176 if (TARGET_SOM && TARGET_GAS)
4178 /* We done with this subspace except possibly for some additional
4179 debug information. Forget that we are in this subspace to ensure
4180 that the next function is output in its own subspace. */
4182 cfun->machine->in_nsubspa = 2;
4185 if (INSN_ADDRESSES_SET_P ())
4187 insn = get_last_nonnote_insn ();
4188 last_address += INSN_ADDRESSES (INSN_UID (insn));
4190 last_address += insn_default_length (insn);
4191 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4192 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4195 last_address = UINT_MAX;
4197 /* Finally, update the total number of code bytes output so far. */
4198 update_total_code_bytes (last_address);
4202 hppa_expand_epilogue (void)
4205 HOST_WIDE_INT offset;
4206 HOST_WIDE_INT ret_off = 0;
4208 int merge_sp_adjust_with_load = 0;
4210 /* We will use this often. */
4211 tmpreg = gen_rtx_REG (word_mode, 1);
4213 /* Try to restore RP early to avoid load/use interlocks when
4214 RP gets used in the return (bv) instruction. This appears to still
4215 be necessary even when we schedule the prologue and epilogue. */
4218 ret_off = TARGET_64BIT ? -16 : -20;
4219 if (frame_pointer_needed)
4221 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4226 /* No frame pointer, and stack is smaller than 8k. */
4227 if (VAL_14_BITS_P (ret_off - actual_fsize))
4229 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4235 /* General register restores. */
4236 if (frame_pointer_needed)
4238 offset = local_fsize;
4240 /* If the current function calls __builtin_eh_return, then we need
4241 to restore the saved EH data registers. */
4242 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4244 unsigned int i, regno;
4248 regno = EH_RETURN_DATA_REGNO (i);
4249 if (regno == INVALID_REGNUM)
4252 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4253 offset += UNITS_PER_WORD;
4257 for (i = 18; i >= 4; i--)
4258 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4260 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4261 offset += UNITS_PER_WORD;
4266 offset = local_fsize - actual_fsize;
4268 /* If the current function calls __builtin_eh_return, then we need
4269 to restore the saved EH data registers. */
4270 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4272 unsigned int i, regno;
4276 regno = EH_RETURN_DATA_REGNO (i);
4277 if (regno == INVALID_REGNUM)
4280 /* Only for the first load.
4281 merge_sp_adjust_with_load holds the register load
4282 with which we will merge the sp adjustment. */
4283 if (merge_sp_adjust_with_load == 0
4285 && VAL_14_BITS_P (-actual_fsize))
4286 merge_sp_adjust_with_load = regno;
4288 load_reg (regno, offset, STACK_POINTER_REGNUM);
4289 offset += UNITS_PER_WORD;
4293 for (i = 18; i >= 3; i--)
4295 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4297 /* Only for the first load.
4298 merge_sp_adjust_with_load holds the register load
4299 with which we will merge the sp adjustment. */
4300 if (merge_sp_adjust_with_load == 0
4302 && VAL_14_BITS_P (-actual_fsize))
4303 merge_sp_adjust_with_load = i;
4305 load_reg (i, offset, STACK_POINTER_REGNUM);
4306 offset += UNITS_PER_WORD;
4311 /* Align pointer properly (doubleword boundary). */
4312 offset = (offset + 7) & ~7;
4314 /* FP register restores. */
4317 /* Adjust the register to index off of. */
4318 if (frame_pointer_needed)
4319 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4321 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4323 /* Actually do the restores now. */
4324 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4325 if (df_regs_ever_live_p (i)
4326 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4328 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4329 rtx dest = gen_rtx_REG (DFmode, i);
4330 emit_move_insn (dest, src);
4334 /* Emit a blockage insn here to keep these insns from being moved to
4335 an earlier spot in the epilogue, or into the main instruction stream.
4337 This is necessary as we must not cut the stack back before all the
4338 restores are finished. */
4339 emit_insn (gen_blockage ());
4341 /* Reset stack pointer (and possibly frame pointer). The stack
4342 pointer is initially set to fp + 64 to avoid a race condition. */
4343 if (frame_pointer_needed)
4345 rtx delta = GEN_INT (-64);
4347 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4348 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4349 stack_pointer_rtx, delta));
4351 /* If we were deferring a callee register restore, do it now. */
4352 else if (merge_sp_adjust_with_load)
4354 rtx delta = GEN_INT (-actual_fsize);
4355 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4357 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4359 else if (actual_fsize != 0)
4360 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4363 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4364 frame greater than 8k), do so now. */
4366 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4368 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4370 rtx sa = EH_RETURN_STACKADJ_RTX;
4372 emit_insn (gen_blockage ());
4373 emit_insn (TARGET_64BIT
4374 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4375 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4380 hppa_pic_save_rtx (void)
4382 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4385 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4386 #define NO_DEFERRED_PROFILE_COUNTERS 0
4390 /* Vector of funcdef numbers. */
4391 static VEC(int,heap) *funcdef_nos;
4393 /* Output deferred profile counters. */
4395 output_deferred_profile_counters (void)
4400 if (VEC_empty (int, funcdef_nos))
4403 switch_to_section (data_section);
4404 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4405 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4407 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4409 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4410 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4413 VEC_free (int, heap, funcdef_nos);
4417 hppa_profile_hook (int label_no)
4419 /* We use SImode for the address of the function in both 32 and
4420 64-bit code to avoid having to provide DImode versions of the
4421 lcla2 and load_offset_label_address insn patterns. */
4422 rtx reg = gen_reg_rtx (SImode);
4423 rtx label_rtx = gen_label_rtx ();
4424 rtx begin_label_rtx, call_insn;
4425 char begin_label_name[16];
4427 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4429 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4432 emit_move_insn (arg_pointer_rtx,
4433 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4436 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4438 /* The address of the function is loaded into %r25 with an instruction-
4439 relative sequence that avoids the use of relocations. The sequence
4440 is split so that the load_offset_label_address instruction can
4441 occupy the delay slot of the call to _mcount. */
4443 emit_insn (gen_lcla2 (reg, label_rtx));
4445 emit_insn (gen_lcla1 (reg, label_rtx));
4447 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4448 reg, begin_label_rtx, label_rtx));
4450 #if !NO_DEFERRED_PROFILE_COUNTERS
4452 rtx count_label_rtx, addr, r24;
4453 char count_label_name[16];
4455 VEC_safe_push (int, heap, funcdef_nos, label_no);
4456 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4457 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4459 addr = force_reg (Pmode, count_label_rtx);
4460 r24 = gen_rtx_REG (Pmode, 24);
4461 emit_move_insn (r24, addr);
4464 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4465 gen_rtx_SYMBOL_REF (Pmode,
4467 GEN_INT (TARGET_64BIT ? 24 : 12)));
4469 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4474 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4475 gen_rtx_SYMBOL_REF (Pmode,
4477 GEN_INT (TARGET_64BIT ? 16 : 8)));
4481 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4482 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4484 /* Indicate the _mcount call cannot throw, nor will it execute a
4486 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4489 /* Fetch the return address for the frame COUNT steps up from
4490 the current frame, after the prologue. FRAMEADDR is the
4491 frame pointer of the COUNT frame.
4493 We want to ignore any export stub remnants here. To handle this,
4494 we examine the code at the return address, and if it is an export
4495 stub, we return a memory rtx for the stub return address stored
4498 The value returned is used in two different ways:
4500 1. To find a function's caller.
4502 2. To change the return address for a function.
4504 This function handles most instances of case 1; however, it will
4505 fail if there are two levels of stubs to execute on the return
4506 path. The only way I believe that can happen is if the return value
4507 needs a parameter relocation, which never happens for C code.
4509 This function handles most instances of case 2; however, it will
4510 fail if we did not originally have stub code on the return path
4511 but will need stub code on the new return path. This can happen if
4512 the caller & callee are both in the main program, but the new
4513 return location is in a shared library. */
4516 return_addr_rtx (int count, rtx frameaddr)
4523 /* Instruction stream at the normal return address for the export stub:
4525 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4526 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4527 0x00011820 | stub+16: mtsp r1,sr0
4528 0xe0400002 | stub+20: be,n 0(sr0,rp)
4530 0xe0400002 must be specified as -532676606 so that it won't be
4531 rejected as an invalid immediate operand on 64-bit hosts. */
4533 HOST_WIDE_INT insns[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4539 rp = get_hard_reg_initial_val (Pmode, 2);
4541 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4544 /* If there is no export stub then just use the value saved from
4545 the return pointer register. */
4547 saved_rp = gen_reg_rtx (Pmode);
4548 emit_move_insn (saved_rp, rp);
4550 /* Get pointer to the instruction stream. We have to mask out the
4551 privilege level from the two low order bits of the return address
4552 pointer here so that ins will point to the start of the first
4553 instruction that would have been executed if we returned. */
4554 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4555 label = gen_label_rtx ();
4557 /* Check the instruction stream at the normal return address for the
4558 export stub. If it is an export stub, than our return address is
4559 really in -24[frameaddr]. */
4561 for (i = 0; i < 3; i++)
4563 rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4));
4564 rtx op1 = GEN_INT (insns[i]);
4565 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4568 /* Here we know that our return address points to an export
4569 stub. We don't want to return the address of the export stub,
4570 but rather the return address of the export stub. That return
4571 address is stored at -24[frameaddr]. */
4573 emit_move_insn (saved_rp,
4575 memory_address (Pmode,
4576 plus_constant (frameaddr,
4585 emit_bcond_fp (rtx operands[])
4587 enum rtx_code code = GET_CODE (operands[0]);
4588 rtx operand0 = operands[1];
4589 rtx operand1 = operands[2];
4590 rtx label = operands[3];
4592 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4593 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4595 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4596 gen_rtx_IF_THEN_ELSE (VOIDmode,
4599 gen_rtx_REG (CCFPmode, 0),
4601 gen_rtx_LABEL_REF (VOIDmode, label),
4606 /* Adjust the cost of a scheduling dependency. Return the new cost of
4607 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4610 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4612 enum attr_type attr_type;
4614 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4615 true dependencies as they are described with bypasses now. */
4616 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4619 if (! recog_memoized (insn))
4622 attr_type = get_attr_type (insn);
4624 switch (REG_NOTE_KIND (link))
4627 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4630 if (attr_type == TYPE_FPLOAD)
4632 rtx pat = PATTERN (insn);
4633 rtx dep_pat = PATTERN (dep_insn);
4634 if (GET_CODE (pat) == PARALLEL)
4636 /* This happens for the fldXs,mb patterns. */
4637 pat = XVECEXP (pat, 0, 0);
4639 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4640 /* If this happens, we have to extend this to schedule
4641 optimally. Return 0 for now. */
4644 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4646 if (! recog_memoized (dep_insn))
4648 switch (get_attr_type (dep_insn))
4655 case TYPE_FPSQRTSGL:
4656 case TYPE_FPSQRTDBL:
4657 /* A fpload can't be issued until one cycle before a
4658 preceding arithmetic operation has finished if
4659 the target of the fpload is any of the sources
4660 (or destination) of the arithmetic operation. */
4661 return insn_default_latency (dep_insn) - 1;
4668 else if (attr_type == TYPE_FPALU)
4670 rtx pat = PATTERN (insn);
4671 rtx dep_pat = PATTERN (dep_insn);
4672 if (GET_CODE (pat) == PARALLEL)
4674 /* This happens for the fldXs,mb patterns. */
4675 pat = XVECEXP (pat, 0, 0);
4677 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4678 /* If this happens, we have to extend this to schedule
4679 optimally. Return 0 for now. */
4682 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4684 if (! recog_memoized (dep_insn))
4686 switch (get_attr_type (dep_insn))
4690 case TYPE_FPSQRTSGL:
4691 case TYPE_FPSQRTDBL:
4692 /* An ALU flop can't be issued until two cycles before a
4693 preceding divide or sqrt operation has finished if
4694 the target of the ALU flop is any of the sources
4695 (or destination) of the divide or sqrt operation. */
4696 return insn_default_latency (dep_insn) - 2;
4704 /* For other anti dependencies, the cost is 0. */
4707 case REG_DEP_OUTPUT:
4708 /* Output dependency; DEP_INSN writes a register that INSN writes some
4710 if (attr_type == TYPE_FPLOAD)
4712 rtx pat = PATTERN (insn);
4713 rtx dep_pat = PATTERN (dep_insn);
4714 if (GET_CODE (pat) == PARALLEL)
4716 /* This happens for the fldXs,mb patterns. */
4717 pat = XVECEXP (pat, 0, 0);
4719 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4720 /* If this happens, we have to extend this to schedule
4721 optimally. Return 0 for now. */
4724 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4726 if (! recog_memoized (dep_insn))
4728 switch (get_attr_type (dep_insn))
4735 case TYPE_FPSQRTSGL:
4736 case TYPE_FPSQRTDBL:
4737 /* A fpload can't be issued until one cycle before a
4738 preceding arithmetic operation has finished if
4739 the target of the fpload is the destination of the
4740 arithmetic operation.
4742 Exception: For PA7100LC, PA7200 and PA7300, the cost
4743 is 3 cycles, unless they bundle together. We also
4744 pay the penalty if the second insn is a fpload. */
4745 return insn_default_latency (dep_insn) - 1;
4752 else if (attr_type == TYPE_FPALU)
4754 rtx pat = PATTERN (insn);
4755 rtx dep_pat = PATTERN (dep_insn);
4756 if (GET_CODE (pat) == PARALLEL)
4758 /* This happens for the fldXs,mb patterns. */
4759 pat = XVECEXP (pat, 0, 0);
4761 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4762 /* If this happens, we have to extend this to schedule
4763 optimally. Return 0 for now. */
4766 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4768 if (! recog_memoized (dep_insn))
4770 switch (get_attr_type (dep_insn))
4774 case TYPE_FPSQRTSGL:
4775 case TYPE_FPSQRTDBL:
4776 /* An ALU flop can't be issued until two cycles before a
4777 preceding divide or sqrt operation has finished if
4778 the target of the ALU flop is also the target of
4779 the divide or sqrt operation. */
4780 return insn_default_latency (dep_insn) - 2;
4788 /* For other output dependencies, the cost is 0. */
4796 /* Adjust scheduling priorities. We use this to try and keep addil
4797 and the next use of %r1 close together. */
4799 pa_adjust_priority (rtx insn, int priority)
4801 rtx set = single_set (insn);
4805 src = SET_SRC (set);
4806 dest = SET_DEST (set);
4807 if (GET_CODE (src) == LO_SUM
4808 && symbolic_operand (XEXP (src, 1), VOIDmode)
4809 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4812 else if (GET_CODE (src) == MEM
4813 && GET_CODE (XEXP (src, 0)) == LO_SUM
4814 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4815 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4818 else if (GET_CODE (dest) == MEM
4819 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4820 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4821 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4827 /* The 700 can only issue a single insn at a time.
4828 The 7XXX processors can issue two insns at a time.
4829 The 8000 can issue 4 insns at a time. */
4831 pa_issue_rate (void)
4835 case PROCESSOR_700: return 1;
4836 case PROCESSOR_7100: return 2;
4837 case PROCESSOR_7100LC: return 2;
4838 case PROCESSOR_7200: return 2;
4839 case PROCESSOR_7300: return 2;
4840 case PROCESSOR_8000: return 4;
4849 /* Return any length adjustment needed by INSN which already has its length
4850 computed as LENGTH. Return zero if no adjustment is necessary.
4852 For the PA: function calls, millicode calls, and backwards short
4853 conditional branches with unfilled delay slots need an adjustment by +1
4854 (to account for the NOP which will be inserted into the instruction stream).
4856 Also compute the length of an inline block move here as it is too
4857 complicated to express as a length attribute in pa.md. */
4859 pa_adjust_insn_length (rtx insn, int length)
4861 rtx pat = PATTERN (insn);
4863 /* Jumps inside switch tables which have unfilled delay slots need
4865 if (GET_CODE (insn) == JUMP_INSN
4866 && GET_CODE (pat) == PARALLEL
4867 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4869 /* Millicode insn with an unfilled delay slot. */
4870 else if (GET_CODE (insn) == INSN
4871 && GET_CODE (pat) != SEQUENCE
4872 && GET_CODE (pat) != USE
4873 && GET_CODE (pat) != CLOBBER
4874 && get_attr_type (insn) == TYPE_MILLI)
4876 /* Block move pattern. */
4877 else if (GET_CODE (insn) == INSN
4878 && GET_CODE (pat) == PARALLEL
4879 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4880 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4881 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4882 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4883 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4884 return compute_movmem_length (insn) - 4;
4885 /* Block clear pattern. */
4886 else if (GET_CODE (insn) == INSN
4887 && GET_CODE (pat) == PARALLEL
4888 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4889 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4890 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4891 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4892 return compute_clrmem_length (insn) - 4;
4893 /* Conditional branch with an unfilled delay slot. */
4894 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4896 /* Adjust a short backwards conditional with an unfilled delay slot. */
4897 if (GET_CODE (pat) == SET
4899 && JUMP_LABEL (insn) != NULL_RTX
4900 && ! forward_branch_p (insn))
4902 else if (GET_CODE (pat) == PARALLEL
4903 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4906 /* Adjust dbra insn with short backwards conditional branch with
4907 unfilled delay slot -- only for case where counter is in a
4908 general register register. */
4909 else if (GET_CODE (pat) == PARALLEL
4910 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4911 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4912 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4914 && ! forward_branch_p (insn))
4922 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
4925 pa_print_operand_punct_valid_p (unsigned char code)
4936 /* Print operand X (an rtx) in assembler syntax to file FILE.
4937 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4938 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4941 print_operand (FILE *file, rtx x, int code)
4946 /* Output a 'nop' if there's nothing for the delay slot. */
4947 if (dbr_sequence_length () == 0)
4948 fputs ("\n\tnop", file);
4951 /* Output a nullification completer if there's nothing for the */
4952 /* delay slot or nullification is requested. */
4953 if (dbr_sequence_length () == 0 ||
4955 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4959 /* Print out the second register name of a register pair.
4960 I.e., R (6) => 7. */
4961 fputs (reg_names[REGNO (x) + 1], file);
4964 /* A register or zero. */
4966 || (x == CONST0_RTX (DFmode))
4967 || (x == CONST0_RTX (SFmode)))
4969 fputs ("%r0", file);
4975 /* A register or zero (floating point). */
4977 || (x == CONST0_RTX (DFmode))
4978 || (x == CONST0_RTX (SFmode)))
4980 fputs ("%fr0", file);
4989 xoperands[0] = XEXP (XEXP (x, 0), 0);
4990 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4991 output_global_address (file, xoperands[1], 0);
4992 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4996 case 'C': /* Plain (C)ondition */
4998 switch (GET_CODE (x))
5001 fputs ("=", file); break;
5003 fputs ("<>", file); break;
5005 fputs (">", file); break;
5007 fputs (">=", file); break;
5009 fputs (">>=", file); break;
5011 fputs (">>", file); break;
5013 fputs ("<", file); break;
5015 fputs ("<=", file); break;
5017 fputs ("<<=", file); break;
5019 fputs ("<<", file); break;
5024 case 'N': /* Condition, (N)egated */
5025 switch (GET_CODE (x))
5028 fputs ("<>", file); break;
5030 fputs ("=", file); break;
5032 fputs ("<=", file); break;
5034 fputs ("<", file); break;
5036 fputs ("<<", file); break;
5038 fputs ("<<=", file); break;
5040 fputs (">=", file); break;
5042 fputs (">", file); break;
5044 fputs (">>", file); break;
5046 fputs (">>=", file); break;
5051 /* For floating point comparisons. Note that the output
5052 predicates are the complement of the desired mode. The
5053 conditions for GT, GE, LT, LE and LTGT cause an invalid
5054 operation exception if the result is unordered and this
5055 exception is enabled in the floating-point status register. */
5057 switch (GET_CODE (x))
5060 fputs ("!=", file); break;
5062 fputs ("=", file); break;
5064 fputs ("!>", file); break;
5066 fputs ("!>=", file); break;
5068 fputs ("!<", file); break;
5070 fputs ("!<=", file); break;
5072 fputs ("!<>", file); break;
5074 fputs ("!?<=", file); break;
5076 fputs ("!?<", file); break;
5078 fputs ("!?>=", file); break;
5080 fputs ("!?>", file); break;
5082 fputs ("!?=", file); break;
5084 fputs ("!?", file); break;
5086 fputs ("?", file); break;
5091 case 'S': /* Condition, operands are (S)wapped. */
5092 switch (GET_CODE (x))
5095 fputs ("=", file); break;
5097 fputs ("<>", file); break;
5099 fputs ("<", file); break;
5101 fputs ("<=", file); break;
5103 fputs ("<<=", file); break;
5105 fputs ("<<", file); break;
5107 fputs (">", file); break;
5109 fputs (">=", file); break;
5111 fputs (">>=", file); break;
5113 fputs (">>", file); break;
5118 case 'B': /* Condition, (B)oth swapped and negate. */
5119 switch (GET_CODE (x))
5122 fputs ("<>", file); break;
5124 fputs ("=", file); break;
5126 fputs (">=", file); break;
5128 fputs (">", file); break;
5130 fputs (">>", file); break;
5132 fputs (">>=", file); break;
5134 fputs ("<=", file); break;
5136 fputs ("<", file); break;
5138 fputs ("<<", file); break;
5140 fputs ("<<=", file); break;
5146 gcc_assert (GET_CODE (x) == CONST_INT);
5147 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5150 gcc_assert (GET_CODE (x) == CONST_INT);
5151 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5154 gcc_assert (GET_CODE (x) == CONST_INT);
5155 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5158 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5159 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5162 gcc_assert (GET_CODE (x) == CONST_INT);
5163 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5166 gcc_assert (GET_CODE (x) == CONST_INT);
5167 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5170 if (GET_CODE (x) == CONST_INT)
5175 switch (GET_CODE (XEXP (x, 0)))
5179 if (ASSEMBLER_DIALECT == 0)
5180 fputs ("s,mb", file);
5182 fputs (",mb", file);
5186 if (ASSEMBLER_DIALECT == 0)
5187 fputs ("s,ma", file);
5189 fputs (",ma", file);
5192 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5193 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5195 if (ASSEMBLER_DIALECT == 0)
5198 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5199 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5201 if (ASSEMBLER_DIALECT == 0)
5202 fputs ("x,s", file);
5206 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5210 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5216 output_global_address (file, x, 0);
5219 output_global_address (file, x, 1);
5221 case 0: /* Don't do anything special */
5226 compute_zdepwi_operands (INTVAL (x), op);
5227 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5233 compute_zdepdi_operands (INTVAL (x), op);
5234 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5238 /* We can get here from a .vtable_inherit due to our
5239 CONSTANT_ADDRESS_P rejecting perfectly good constant
5245 if (GET_CODE (x) == REG)
5247 fputs (reg_names [REGNO (x)], file);
5248 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5254 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5255 && (REGNO (x) & 1) == 0)
5258 else if (GET_CODE (x) == MEM)
5260 int size = GET_MODE_SIZE (GET_MODE (x));
5261 rtx base = NULL_RTX;
5262 switch (GET_CODE (XEXP (x, 0)))
5266 base = XEXP (XEXP (x, 0), 0);
5267 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5271 base = XEXP (XEXP (x, 0), 0);
5272 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5275 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5276 fprintf (file, "%s(%s)",
5277 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5278 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5279 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5280 fprintf (file, "%s(%s)",
5281 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5282 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5283 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5284 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5286 /* Because the REG_POINTER flag can get lost during reload,
5287 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5288 index and base registers in the combined move patterns. */
5289 rtx base = XEXP (XEXP (x, 0), 1);
5290 rtx index = XEXP (XEXP (x, 0), 0);
5292 fprintf (file, "%s(%s)",
5293 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5296 output_address (XEXP (x, 0));
5299 output_address (XEXP (x, 0));
5304 output_addr_const (file, x);
5307 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5310 output_global_address (FILE *file, rtx x, int round_constant)
5313 /* Imagine (high (const (plus ...))). */
5314 if (GET_CODE (x) == HIGH)
5317 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5318 output_addr_const (file, x);
5319 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5321 output_addr_const (file, x);
5322 fputs ("-$global$", file);
5324 else if (GET_CODE (x) == CONST)
5326 const char *sep = "";
5327 int offset = 0; /* assembler wants -$global$ at end */
5328 rtx base = NULL_RTX;
5330 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5333 base = XEXP (XEXP (x, 0), 0);
5334 output_addr_const (file, base);
5337 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5343 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5346 base = XEXP (XEXP (x, 0), 1);
5347 output_addr_const (file, base);
5350 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5356 /* How bogus. The compiler is apparently responsible for
5357 rounding the constant if it uses an LR field selector.
5359 The linker and/or assembler seem a better place since
5360 they have to do this kind of thing already.
5362 If we fail to do this, HP's optimizing linker may eliminate
5363 an addil, but not update the ldw/stw/ldo instruction that
5364 uses the result of the addil. */
5366 offset = ((offset + 0x1000) & ~0x1fff);
5368 switch (GET_CODE (XEXP (x, 0)))
5381 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5389 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5390 fputs ("-$global$", file);
5392 fprintf (file, "%s%d", sep, offset);
5395 output_addr_const (file, x);
5398 /* Output boilerplate text to appear at the beginning of the file.
5399 There are several possible versions. */
5400 #define aputs(x) fputs(x, asm_out_file)
5402 pa_file_start_level (void)
5405 aputs ("\t.LEVEL 2.0w\n");
5406 else if (TARGET_PA_20)
5407 aputs ("\t.LEVEL 2.0\n");
5408 else if (TARGET_PA_11)
5409 aputs ("\t.LEVEL 1.1\n");
5411 aputs ("\t.LEVEL 1.0\n");
5415 pa_file_start_space (int sortspace)
5417 aputs ("\t.SPACE $PRIVATE$");
5420 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5421 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5422 "\n\t.SPACE $TEXT$");
5425 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5426 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5430 pa_file_start_file (int want_version)
5432 if (write_symbols != NO_DEBUG)
5434 output_file_directive (asm_out_file, main_input_filename);
5436 aputs ("\t.version\t\"01.01\"\n");
5441 pa_file_start_mcount (const char *aswhat)
5444 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5448 pa_elf_file_start (void)
5450 pa_file_start_level ();
5451 pa_file_start_mcount ("ENTRY");
5452 pa_file_start_file (0);
5456 pa_som_file_start (void)
5458 pa_file_start_level ();
5459 pa_file_start_space (0);
5460 aputs ("\t.IMPORT $global$,DATA\n"
5461 "\t.IMPORT $$dyncall,MILLICODE\n");
5462 pa_file_start_mcount ("CODE");
5463 pa_file_start_file (0);
5467 pa_linux_file_start (void)
5469 pa_file_start_file (1);
5470 pa_file_start_level ();
5471 pa_file_start_mcount ("CODE");
5475 pa_hpux64_gas_file_start (void)
5477 pa_file_start_level ();
5478 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5480 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5482 pa_file_start_file (1);
5486 pa_hpux64_hpas_file_start (void)
5488 pa_file_start_level ();
5489 pa_file_start_space (1);
5490 pa_file_start_mcount ("CODE");
5491 pa_file_start_file (0);
5495 /* Search the deferred plabel list for SYMBOL and return its internal
5496 label. If an entry for SYMBOL is not found, a new entry is created. */
5499 get_deferred_plabel (rtx symbol)
5501 const char *fname = XSTR (symbol, 0);
5504 /* See if we have already put this function on the list of deferred
5505 plabels. This list is generally small, so a liner search is not
5506 too ugly. If it proves too slow replace it with something faster. */
5507 for (i = 0; i < n_deferred_plabels; i++)
5508 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5511 /* If the deferred plabel list is empty, or this entry was not found
5512 on the list, create a new entry on the list. */
5513 if (deferred_plabels == NULL || i == n_deferred_plabels)
5517 if (deferred_plabels == 0)
5518 deferred_plabels = ggc_alloc_deferred_plabel ();
5520 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5522 n_deferred_plabels + 1);
5524 i = n_deferred_plabels++;
5525 deferred_plabels[i].internal_label = gen_label_rtx ();
5526 deferred_plabels[i].symbol = symbol;
5528 /* Gross. We have just implicitly taken the address of this
5529 function. Mark it in the same manner as assemble_name. */
5530 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5532 mark_referenced (id);
5535 return deferred_plabels[i].internal_label;
5539 output_deferred_plabels (void)
5543 /* If we have some deferred plabels, then we need to switch into the
5544 data or readonly data section, and align it to a 4 byte boundary
5545 before outputting the deferred plabels. */
5546 if (n_deferred_plabels)
5548 switch_to_section (flag_pic ? data_section : readonly_data_section);
5549 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5552 /* Now output the deferred plabels. */
5553 for (i = 0; i < n_deferred_plabels; i++)
5555 targetm.asm_out.internal_label (asm_out_file, "L",
5556 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5557 assemble_integer (deferred_plabels[i].symbol,
5558 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5562 #if HPUX_LONG_DOUBLE_LIBRARY
5563 /* Initialize optabs to point to HPUX long double emulation routines. */
5565 pa_hpux_init_libfuncs (void)
5567 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5568 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5569 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5570 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5571 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5572 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5573 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5574 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5575 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5577 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5578 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5579 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5580 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5581 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5582 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5583 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5585 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5586 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5587 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5588 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5590 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5591 ? "__U_Qfcnvfxt_quad_to_sgl"
5592 : "_U_Qfcnvfxt_quad_to_sgl");
5593 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5594 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5595 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5597 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5598 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5599 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5600 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5604 /* HP's millicode routines mean something special to the assembler.
5605 Keep track of which ones we have used. */
5607 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5608 static void import_milli (enum millicodes);
5609 static char imported[(int) end1000];
5610 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5611 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5612 #define MILLI_START 10
5615 import_milli (enum millicodes code)
5617 char str[sizeof (import_string)];
5619 if (!imported[(int) code])
5621 imported[(int) code] = 1;
5622 strcpy (str, import_string);
5623 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5624 output_asm_insn (str, 0);
5628 /* The register constraints have put the operands and return value in
5629 the proper registers. */
5632 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5634 import_milli (mulI);
5635 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5638 /* Emit the rtl for doing a division by a constant. */
5640 /* Do magic division millicodes exist for this value? */
5641 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5643 /* We'll use an array to keep track of the magic millicodes and
5644 whether or not we've used them already. [n][0] is signed, [n][1] is
5647 static int div_milli[16][2];
5650 emit_hpdiv_const (rtx *operands, int unsignedp)
5652 if (GET_CODE (operands[2]) == CONST_INT
5653 && INTVAL (operands[2]) > 0
5654 && INTVAL (operands[2]) < 16
5655 && magic_milli[INTVAL (operands[2])])
5657 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5659 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5663 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5664 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5666 gen_rtx_REG (SImode, 26),
5668 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5669 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5670 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5671 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5672 gen_rtx_CLOBBER (VOIDmode, ret))));
5673 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5680 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5684 /* If the divisor is a constant, try to use one of the special
5686 if (GET_CODE (operands[0]) == CONST_INT)
5688 static char buf[100];
5689 divisor = INTVAL (operands[0]);
5690 if (!div_milli[divisor][unsignedp])
5692 div_milli[divisor][unsignedp] = 1;
5694 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5696 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5700 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5701 INTVAL (operands[0]));
5702 return output_millicode_call (insn,
5703 gen_rtx_SYMBOL_REF (SImode, buf));
5707 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5708 INTVAL (operands[0]));
5709 return output_millicode_call (insn,
5710 gen_rtx_SYMBOL_REF (SImode, buf));
5713 /* Divisor isn't a special constant. */
5718 import_milli (divU);
5719 return output_millicode_call (insn,
5720 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5724 import_milli (divI);
5725 return output_millicode_call (insn,
5726 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5731 /* Output a $$rem millicode to do mod. */
5734 output_mod_insn (int unsignedp, rtx insn)
5738 import_milli (remU);
5739 return output_millicode_call (insn,
5740 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5744 import_milli (remI);
5745 return output_millicode_call (insn,
5746 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5751 output_arg_descriptor (rtx call_insn)
5753 const char *arg_regs[4];
5754 enum machine_mode arg_mode;
5756 int i, output_flag = 0;
5759 /* We neither need nor want argument location descriptors for the
5760 64bit runtime environment or the ELF32 environment. */
5761 if (TARGET_64BIT || TARGET_ELF32)
5764 for (i = 0; i < 4; i++)
5767 /* Specify explicitly that no argument relocations should take place
5768 if using the portable runtime calling conventions. */
5769 if (TARGET_PORTABLE_RUNTIME)
5771 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5776 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5777 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5778 link; link = XEXP (link, 1))
5780 rtx use = XEXP (link, 0);
5782 if (! (GET_CODE (use) == USE
5783 && GET_CODE (XEXP (use, 0)) == REG
5784 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5787 arg_mode = GET_MODE (XEXP (use, 0));
5788 regno = REGNO (XEXP (use, 0));
5789 if (regno >= 23 && regno <= 26)
5791 arg_regs[26 - regno] = "GR";
5792 if (arg_mode == DImode)
5793 arg_regs[25 - regno] = "GR";
5795 else if (regno >= 32 && regno <= 39)
5797 if (arg_mode == SFmode)
5798 arg_regs[(regno - 32) / 2] = "FR";
5801 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5802 arg_regs[(regno - 34) / 2] = "FR";
5803 arg_regs[(regno - 34) / 2 + 1] = "FU";
5805 arg_regs[(regno - 34) / 2] = "FU";
5806 arg_regs[(regno - 34) / 2 + 1] = "FR";
5811 fputs ("\t.CALL ", asm_out_file);
5812 for (i = 0; i < 4; i++)
5817 fputc (',', asm_out_file);
5818 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5821 fputc ('\n', asm_out_file);
5824 /* Inform reload about cases where moving X with a mode MODE to a register in
5825 RCLASS requires an extra scratch or immediate register. Return the class
5826 needed for the immediate register. */
5829 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
5830 enum machine_mode mode, secondary_reload_info *sri)
5833 enum reg_class rclass = (enum reg_class) rclass_i;
5835 /* Handle the easy stuff first. */
5836 if (rclass == R1_REGS)
5842 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5848 /* If we have something like (mem (mem (...)), we can safely assume the
5849 inner MEM will end up in a general register after reloading, so there's
5850 no need for a secondary reload. */
5851 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5854 /* Trying to load a constant into a FP register during PIC code
5855 generation requires %r1 as a scratch register. */
5857 && (mode == SImode || mode == DImode)
5858 && FP_REG_CLASS_P (rclass)
5859 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5861 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5862 : CODE_FOR_reload_indi_r1);
5866 /* Secondary reloads of symbolic operands require %r1 as a scratch
5867 register when we're generating PIC code and when the operand isn't
5869 if (symbolic_expression_p (x))
5871 if (GET_CODE (x) == HIGH)
5874 if (flag_pic || !read_only_operand (x, VOIDmode))
5876 gcc_assert (mode == SImode || mode == DImode);
5877 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5878 : CODE_FOR_reload_indi_r1);
5883 /* Profiling showed the PA port spends about 1.3% of its compilation
5884 time in true_regnum from calls inside pa_secondary_reload_class. */
5885 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5886 regno = true_regnum (x);
5888 /* In order to allow 14-bit displacements in integer loads and stores,
5889 we need to prevent reload from generating out of range integer mode
5890 loads and stores to the floating point registers. Previously, we
5891 used to call for a secondary reload and have emit_move_sequence()
5892 fix the instruction sequence. However, reload occasionally wouldn't
5893 generate the reload and we would end up with an invalid REG+D memory
5894 address. So, now we use an intermediate general register for most
5895 memory loads and stores. */
5896 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5897 && GET_MODE_CLASS (mode) == MODE_INT
5898 && FP_REG_CLASS_P (rclass))
5900 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5901 the secondary reload needed for a pseudo. It never passes a
5903 if (GET_CODE (x) == MEM)
5907 /* We don't need an intermediate for indexed and LO_SUM DLT
5908 memory addresses. When INT14_OK_STRICT is true, it might
5909 appear that we could directly allow register indirect
5910 memory addresses. However, this doesn't work because we
5911 don't support SUBREGs in floating-point register copies
5912 and reload doesn't tell us when it's going to use a SUBREG. */
5913 if (IS_INDEX_ADDR_P (x)
5914 || IS_LO_SUM_DLT_ADDR_P (x))
5917 /* Otherwise, we need an intermediate general register. */
5918 return GENERAL_REGS;
5921 /* Request a secondary reload with a general scratch register
5922 for everthing else. ??? Could symbolic operands be handled
5923 directly when generating non-pic PA 2.0 code? */
5925 ? direct_optab_handler (reload_in_optab, mode)
5926 : direct_optab_handler (reload_out_optab, mode));
5930 /* A SAR<->FP register copy requires an intermediate general register
5931 and secondary memory. We need a secondary reload with a general
5932 scratch register for spills. */
5933 if (rclass == SHIFT_REGS)
5936 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
5939 ? direct_optab_handler (reload_in_optab, mode)
5940 : direct_optab_handler (reload_out_optab, mode));
5944 /* Handle FP copy. */
5945 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
5946 return GENERAL_REGS;
5949 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5950 && REGNO_REG_CLASS (regno) == SHIFT_REGS
5951 && FP_REG_CLASS_P (rclass))
5952 return GENERAL_REGS;
5957 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
5958 is only marked as live on entry by df-scan when it is a fixed
5959 register. It isn't a fixed register in the 64-bit runtime,
5960 so we need to mark it here. */
5963 pa_extra_live_on_entry (bitmap regs)
5966 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
5969 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
5970 to prevent it from being deleted. */
5973 pa_eh_return_handler_rtx (void)
5977 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
5978 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
5979 tmp = gen_rtx_MEM (word_mode, tmp);
5984 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5985 by invisible reference. As a GCC extension, we also pass anything
5986 with a zero or variable size by reference.
5988 The 64-bit runtime does not describe passing any types by invisible
5989 reference. The internals of GCC can't currently handle passing
5990 empty structures, and zero or variable length arrays when they are
5991 not passed entirely on the stack or by reference. Thus, as a GCC
5992 extension, we pass these types by reference. The HP compiler doesn't
5993 support these types, so hopefully there shouldn't be any compatibility
5994 issues. This may have to be revisited when HP releases a C99 compiler
5995 or updates the ABI. */
5998 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5999 enum machine_mode mode, const_tree type,
6000 bool named ATTRIBUTE_UNUSED)
6005 size = int_size_in_bytes (type);
6007 size = GET_MODE_SIZE (mode);
6012 return size <= 0 || size > 8;
6016 function_arg_padding (enum machine_mode mode, const_tree type)
6021 && (AGGREGATE_TYPE_P (type)
6022 || TREE_CODE (type) == COMPLEX_TYPE
6023 || TREE_CODE (type) == VECTOR_TYPE)))
6025 /* Return none if justification is not required. */
6027 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6028 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6031 /* The directions set here are ignored when a BLKmode argument larger
6032 than a word is placed in a register. Different code is used for
6033 the stack and registers. This makes it difficult to have a
6034 consistent data representation for both the stack and registers.
6035 For both runtimes, the justification and padding for arguments on
6036 the stack and in registers should be identical. */
6038 /* The 64-bit runtime specifies left justification for aggregates. */
6041 /* The 32-bit runtime architecture specifies right justification.
6042 When the argument is passed on the stack, the argument is padded
6043 with garbage on the left. The HP compiler pads with zeros. */
6047 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6054 /* Do what is necessary for `va_start'. We look at the current function
6055 to determine if stdargs or varargs is used and fill in an initial
6056 va_list. A pointer to this constructor is returned. */
6059 hppa_builtin_saveregs (void)
6062 tree fntype = TREE_TYPE (current_function_decl);
6063 int argadj = ((!stdarg_p (fntype))
6064 ? UNITS_PER_WORD : 0);
6067 offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
6069 offset = crtl->args.arg_offset_rtx;
6075 /* Adjust for varargs/stdarg differences. */
6077 offset = plus_constant (crtl->args.arg_offset_rtx, -argadj);
6079 offset = crtl->args.arg_offset_rtx;
6081 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6082 from the incoming arg pointer and growing to larger addresses. */
6083 for (i = 26, off = -64; i >= 19; i--, off += 8)
6084 emit_move_insn (gen_rtx_MEM (word_mode,
6085 plus_constant (arg_pointer_rtx, off)),
6086 gen_rtx_REG (word_mode, i));
6088 /* The incoming args pointer points just beyond the flushback area;
6089 normally this is not a serious concern. However, when we are doing
6090 varargs/stdargs we want to make the arg pointer point to the start
6091 of the incoming argument area. */
6092 emit_move_insn (virtual_incoming_args_rtx,
6093 plus_constant (arg_pointer_rtx, -64));
6095 /* Now return a pointer to the first anonymous argument. */
6096 return copy_to_reg (expand_binop (Pmode, add_optab,
6097 virtual_incoming_args_rtx,
6098 offset, 0, 0, OPTAB_LIB_WIDEN));
6101 /* Store general registers on the stack. */
6102 dest = gen_rtx_MEM (BLKmode,
6103 plus_constant (crtl->args.internal_arg_pointer,
6105 set_mem_alias_set (dest, get_varargs_alias_set ());
6106 set_mem_align (dest, BITS_PER_WORD);
6107 move_block_from_reg (23, dest, 4);
6109 /* move_block_from_reg will emit code to store the argument registers
6110 individually as scalar stores.
6112 However, other insns may later load from the same addresses for
6113 a structure load (passing a struct to a varargs routine).
6115 The alias code assumes that such aliasing can never happen, so we
6116 have to keep memory referencing insns from moving up beyond the
6117 last argument register store. So we emit a blockage insn here. */
6118 emit_insn (gen_blockage ());
6120 return copy_to_reg (expand_binop (Pmode, add_optab,
6121 crtl->args.internal_arg_pointer,
6122 offset, 0, 0, OPTAB_LIB_WIDEN));
6126 hppa_va_start (tree valist, rtx nextarg)
6128 nextarg = expand_builtin_saveregs ();
6129 std_expand_builtin_va_start (valist, nextarg);
6133 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6138 /* Args grow upward. We can use the generic routines. */
6139 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6141 else /* !TARGET_64BIT */
6143 tree ptr = build_pointer_type (type);
6146 unsigned int size, ofs;
6149 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6153 ptr = build_pointer_type (type);
6155 size = int_size_in_bytes (type);
6156 valist_type = TREE_TYPE (valist);
6158 /* Args grow down. Not handled by generic routines. */
6160 u = fold_convert (sizetype, size_in_bytes (type));
6161 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6162 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
6164 /* Align to 4 or 8 byte boundary depending on argument size. */
6166 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6167 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6168 t = fold_convert (valist_type, t);
6170 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6172 ofs = (8 - size) % 4;
6176 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
6179 t = fold_convert (ptr, t);
6180 t = build_va_arg_indirect_ref (t);
6183 t = build_va_arg_indirect_ref (t);
6189 /* True if MODE is valid for the target. By "valid", we mean able to
6190 be manipulated in non-trivial ways. In particular, this means all
6191 the arithmetic is supported.
6193 Currently, TImode is not valid as the HP 64-bit runtime documentation
6194 doesn't document the alignment and calling conventions for this type.
6195 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6196 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6199 pa_scalar_mode_supported_p (enum machine_mode mode)
6201 int precision = GET_MODE_PRECISION (mode);
6203 switch (GET_MODE_CLASS (mode))
6205 case MODE_PARTIAL_INT:
6207 if (precision == CHAR_TYPE_SIZE)
6209 if (precision == SHORT_TYPE_SIZE)
6211 if (precision == INT_TYPE_SIZE)
6213 if (precision == LONG_TYPE_SIZE)
6215 if (precision == LONG_LONG_TYPE_SIZE)
6220 if (precision == FLOAT_TYPE_SIZE)
6222 if (precision == DOUBLE_TYPE_SIZE)
6224 if (precision == LONG_DOUBLE_TYPE_SIZE)
6228 case MODE_DECIMAL_FLOAT:
6236 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6237 it branches into the delay slot. Otherwise, return FALSE. */
6240 branch_to_delay_slot_p (rtx insn)
6244 if (dbr_sequence_length ())
6247 jump_insn = next_active_insn (JUMP_LABEL (insn));
6250 insn = next_active_insn (insn);
6251 if (jump_insn == insn)
6254 /* We can't rely on the length of asms. So, we return FALSE when
6255 the branch is followed by an asm. */
6257 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6258 || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6259 || get_attr_length (insn) > 0)
6266 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6268 This occurs when INSN has an unfilled delay slot and is followed
6269 by an asm. Disaster can occur if the asm is empty and the jump
6270 branches into the delay slot. So, we add a nop in the delay slot
6271 when this occurs. */
6274 branch_needs_nop_p (rtx insn)
6278 if (dbr_sequence_length ())
6281 jump_insn = next_active_insn (JUMP_LABEL (insn));
6284 insn = next_active_insn (insn);
6285 if (!insn || jump_insn == insn)
6288 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6289 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6290 && get_attr_length (insn) > 0)
6297 /* Return TRUE if INSN, a forward jump insn, can use nullification
6298 to skip the following instruction. This avoids an extra cycle due
6299 to a mis-predicted branch when we fall through. */
6302 use_skip_p (rtx insn)
6304 rtx jump_insn = next_active_insn (JUMP_LABEL (insn));
6308 insn = next_active_insn (insn);
6310 /* We can't rely on the length of asms, so we can't skip asms. */
6312 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6313 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6315 if (get_attr_length (insn) == 4
6316 && jump_insn == next_active_insn (insn))
6318 if (get_attr_length (insn) > 0)
6325 /* This routine handles all the normal conditional branch sequences we
6326 might need to generate. It handles compare immediate vs compare
6327 register, nullification of delay slots, varying length branches,
6328 negated branches, and all combinations of the above. It returns the
6329 output appropriate to emit the branch corresponding to all given
6333 output_cbranch (rtx *operands, int negated, rtx insn)
6335 static char buf[100];
6337 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6338 int length = get_attr_length (insn);
6341 /* A conditional branch to the following instruction (e.g. the delay slot)
6342 is asking for a disaster. This can happen when not optimizing and
6343 when jump optimization fails.
6345 While it is usually safe to emit nothing, this can fail if the
6346 preceding instruction is a nullified branch with an empty delay
6347 slot and the same branch target as this branch. We could check
6348 for this but jump optimization should eliminate nop jumps. It
6349 is always safe to emit a nop. */
6350 if (branch_to_delay_slot_p (insn))
6353 /* The doubleword form of the cmpib instruction doesn't have the LEU
6354 and GTU conditions while the cmpb instruction does. Since we accept
6355 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6356 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6357 operands[2] = gen_rtx_REG (DImode, 0);
6358 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6359 operands[1] = gen_rtx_REG (DImode, 0);
6361 /* If this is a long branch with its delay slot unfilled, set `nullify'
6362 as it can nullify the delay slot and save a nop. */
6363 if (length == 8 && dbr_sequence_length () == 0)
6366 /* If this is a short forward conditional branch which did not get
6367 its delay slot filled, the delay slot can still be nullified. */
6368 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6369 nullify = forward_branch_p (insn);
6371 /* A forward branch over a single nullified insn can be done with a
6372 comclr instruction. This avoids a single cycle penalty due to
6373 mis-predicted branch if we fall through (branch not taken). */
6374 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6378 /* All short conditional branches except backwards with an unfilled
6382 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6384 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6385 if (GET_MODE (operands[1]) == DImode)
6388 strcat (buf, "%B3");
6390 strcat (buf, "%S3");
6392 strcat (buf, " %2,%r1,%%r0");
6395 if (branch_needs_nop_p (insn))
6396 strcat (buf, ",n %2,%r1,%0%#");
6398 strcat (buf, ",n %2,%r1,%0");
6401 strcat (buf, " %2,%r1,%0");
6404 /* All long conditionals. Note a short backward branch with an
6405 unfilled delay slot is treated just like a long backward branch
6406 with an unfilled delay slot. */
6408 /* Handle weird backwards branch with a filled delay slot
6409 which is nullified. */
6410 if (dbr_sequence_length () != 0
6411 && ! forward_branch_p (insn)
6414 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6415 if (GET_MODE (operands[1]) == DImode)
6418 strcat (buf, "%S3");
6420 strcat (buf, "%B3");
6421 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6423 /* Handle short backwards branch with an unfilled delay slot.
6424 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6425 taken and untaken branches. */
6426 else if (dbr_sequence_length () == 0
6427 && ! forward_branch_p (insn)
6428 && INSN_ADDRESSES_SET_P ()
6429 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6430 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6432 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6433 if (GET_MODE (operands[1]) == DImode)
6436 strcat (buf, "%B3 %2,%r1,%0%#");
6438 strcat (buf, "%S3 %2,%r1,%0%#");
6442 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6443 if (GET_MODE (operands[1]) == DImode)
6446 strcat (buf, "%S3");
6448 strcat (buf, "%B3");
6450 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6452 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6457 /* The reversed conditional branch must branch over one additional
6458 instruction if the delay slot is filled and needs to be extracted
6459 by output_lbranch. If the delay slot is empty or this is a
6460 nullified forward branch, the instruction after the reversed
6461 condition branch must be nullified. */
6462 if (dbr_sequence_length () == 0
6463 || (nullify && forward_branch_p (insn)))
6467 operands[4] = GEN_INT (length);
6472 operands[4] = GEN_INT (length + 4);
6475 /* Create a reversed conditional branch which branches around
6476 the following insns. */
6477 if (GET_MODE (operands[1]) != DImode)
6483 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6486 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6492 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6495 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6504 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6507 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6513 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6516 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6520 output_asm_insn (buf, operands);
6521 return output_lbranch (operands[0], insn, xdelay);
6526 /* This routine handles output of long unconditional branches that
6527 exceed the maximum range of a simple branch instruction. Since
6528 we don't have a register available for the branch, we save register
6529 %r1 in the frame marker, load the branch destination DEST into %r1,
6530 execute the branch, and restore %r1 in the delay slot of the branch.
6532 Since long branches may have an insn in the delay slot and the
6533 delay slot is used to restore %r1, we in general need to extract
6534 this insn and execute it before the branch. However, to facilitate
6535 use of this function by conditional branches, we also provide an
6536 option to not extract the delay insn so that it will be emitted
6537 after the long branch. So, if there is an insn in the delay slot,
6538 it is extracted if XDELAY is nonzero.
6540 The lengths of the various long-branch sequences are 20, 16 and 24
6541 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6544 output_lbranch (rtx dest, rtx insn, int xdelay)
6548 xoperands[0] = dest;
6550 /* First, free up the delay slot. */
6551 if (xdelay && dbr_sequence_length () != 0)
6553 /* We can't handle a jump in the delay slot. */
6554 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6556 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6559 /* Now delete the delay insn. */
6560 SET_INSN_DELETED (NEXT_INSN (insn));
6563 /* Output an insn to save %r1. The runtime documentation doesn't
6564 specify whether the "Clean Up" slot in the callers frame can
6565 be clobbered by the callee. It isn't copied by HP's builtin
6566 alloca, so this suggests that it can be clobbered if necessary.
6567 The "Static Link" location is copied by HP builtin alloca, so
6568 we avoid using it. Using the cleanup slot might be a problem
6569 if we have to interoperate with languages that pass cleanup
6570 information. However, it should be possible to handle these
6571 situations with GCC's asm feature.
6573 The "Current RP" slot is reserved for the called procedure, so
6574 we try to use it when we don't have a frame of our own. It's
6575 rather unlikely that we won't have a frame when we need to emit
6578 Really the way to go long term is a register scavenger; goto
6579 the target of the jump and find a register which we can use
6580 as a scratch to hold the value in %r1. Then, we wouldn't have
6581 to free up the delay slot or clobber a slot that may be needed
6582 for other purposes. */
6585 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6586 /* Use the return pointer slot in the frame marker. */
6587 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6589 /* Use the slot at -40 in the frame marker since HP builtin
6590 alloca doesn't copy it. */
6591 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6595 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6596 /* Use the return pointer slot in the frame marker. */
6597 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6599 /* Use the "Clean Up" slot in the frame marker. In GCC,
6600 the only other use of this location is for copying a
6601 floating point double argument from a floating-point
6602 register to two general registers. The copy is done
6603 as an "atomic" operation when outputting a call, so it
6604 won't interfere with our using the location here. */
6605 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6608 if (TARGET_PORTABLE_RUNTIME)
6610 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6611 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6612 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6616 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6617 if (TARGET_SOM || !TARGET_GAS)
6619 xoperands[1] = gen_label_rtx ();
6620 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6621 targetm.asm_out.internal_label (asm_out_file, "L",
6622 CODE_LABEL_NUMBER (xoperands[1]));
6623 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6627 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6628 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6630 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6633 /* Now output a very long branch to the original target. */
6634 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6636 /* Now restore the value of %r1 in the delay slot. */
6639 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6640 return "ldd -16(%%r30),%%r1";
6642 return "ldd -40(%%r30),%%r1";
6646 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6647 return "ldw -20(%%r30),%%r1";
6649 return "ldw -12(%%r30),%%r1";
6653 /* This routine handles all the branch-on-bit conditional branch sequences we
6654 might need to generate. It handles nullification of delay slots,
6655 varying length branches, negated branches and all combinations of the
6656 above. it returns the appropriate output template to emit the branch. */
6659 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6661 static char buf[100];
6663 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6664 int length = get_attr_length (insn);
6667 /* A conditional branch to the following instruction (e.g. the delay slot) is
6668 asking for a disaster. I do not think this can happen as this pattern
6669 is only used when optimizing; jump optimization should eliminate the
6670 jump. But be prepared just in case. */
6672 if (branch_to_delay_slot_p (insn))
6675 /* If this is a long branch with its delay slot unfilled, set `nullify'
6676 as it can nullify the delay slot and save a nop. */
6677 if (length == 8 && dbr_sequence_length () == 0)
6680 /* If this is a short forward conditional branch which did not get
6681 its delay slot filled, the delay slot can still be nullified. */
6682 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6683 nullify = forward_branch_p (insn);
6685 /* A forward branch over a single nullified insn can be done with a
6686 extrs instruction. This avoids a single cycle penalty due to
6687 mis-predicted branch if we fall through (branch not taken). */
6688 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6693 /* All short conditional branches except backwards with an unfilled
6697 strcpy (buf, "{extrs,|extrw,s,}");
6699 strcpy (buf, "bb,");
6700 if (useskip && GET_MODE (operands[0]) == DImode)
6701 strcpy (buf, "extrd,s,*");
6702 else if (GET_MODE (operands[0]) == DImode)
6703 strcpy (buf, "bb,*");
6704 if ((which == 0 && negated)
6705 || (which == 1 && ! negated))
6710 strcat (buf, " %0,%1,1,%%r0");
6711 else if (nullify && negated)
6713 if (branch_needs_nop_p (insn))
6714 strcat (buf, ",n %0,%1,%3%#");
6716 strcat (buf, ",n %0,%1,%3");
6718 else if (nullify && ! negated)
6720 if (branch_needs_nop_p (insn))
6721 strcat (buf, ",n %0,%1,%2%#");
6723 strcat (buf, ",n %0,%1,%2");
6725 else if (! nullify && negated)
6726 strcat (buf, " %0,%1,%3");
6727 else if (! nullify && ! negated)
6728 strcat (buf, " %0,%1,%2");
6731 /* All long conditionals. Note a short backward branch with an
6732 unfilled delay slot is treated just like a long backward branch
6733 with an unfilled delay slot. */
6735 /* Handle weird backwards branch with a filled delay slot
6736 which is nullified. */
6737 if (dbr_sequence_length () != 0
6738 && ! forward_branch_p (insn)
6741 strcpy (buf, "bb,");
6742 if (GET_MODE (operands[0]) == DImode)
6744 if ((which == 0 && negated)
6745 || (which == 1 && ! negated))
6750 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6752 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6754 /* Handle short backwards branch with an unfilled delay slot.
6755 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6756 taken and untaken branches. */
6757 else if (dbr_sequence_length () == 0
6758 && ! forward_branch_p (insn)
6759 && INSN_ADDRESSES_SET_P ()
6760 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6761 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6763 strcpy (buf, "bb,");
6764 if (GET_MODE (operands[0]) == DImode)
6766 if ((which == 0 && negated)
6767 || (which == 1 && ! negated))
6772 strcat (buf, " %0,%1,%3%#");
6774 strcat (buf, " %0,%1,%2%#");
6778 if (GET_MODE (operands[0]) == DImode)
6779 strcpy (buf, "extrd,s,*");
6781 strcpy (buf, "{extrs,|extrw,s,}");
6782 if ((which == 0 && negated)
6783 || (which == 1 && ! negated))
6787 if (nullify && negated)
6788 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6789 else if (nullify && ! negated)
6790 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6792 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6794 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6799 /* The reversed conditional branch must branch over one additional
6800 instruction if the delay slot is filled and needs to be extracted
6801 by output_lbranch. If the delay slot is empty or this is a
6802 nullified forward branch, the instruction after the reversed
6803 condition branch must be nullified. */
6804 if (dbr_sequence_length () == 0
6805 || (nullify && forward_branch_p (insn)))
6809 operands[4] = GEN_INT (length);
6814 operands[4] = GEN_INT (length + 4);
6817 if (GET_MODE (operands[0]) == DImode)
6818 strcpy (buf, "bb,*");
6820 strcpy (buf, "bb,");
6821 if ((which == 0 && negated)
6822 || (which == 1 && !negated))
6827 strcat (buf, ",n %0,%1,.+%4");
6829 strcat (buf, " %0,%1,.+%4");
6830 output_asm_insn (buf, operands);
6831 return output_lbranch (negated ? operands[3] : operands[2],
6837 /* This routine handles all the branch-on-variable-bit conditional branch
6838 sequences we might need to generate. It handles nullification of delay
6839 slots, varying length branches, negated branches and all combinations
6840 of the above. it returns the appropriate output template to emit the
6844 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6846 static char buf[100];
6848 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6849 int length = get_attr_length (insn);
6852 /* A conditional branch to the following instruction (e.g. the delay slot) is
6853 asking for a disaster. I do not think this can happen as this pattern
6854 is only used when optimizing; jump optimization should eliminate the
6855 jump. But be prepared just in case. */
6857 if (branch_to_delay_slot_p (insn))
6860 /* If this is a long branch with its delay slot unfilled, set `nullify'
6861 as it can nullify the delay slot and save a nop. */
6862 if (length == 8 && dbr_sequence_length () == 0)
6865 /* If this is a short forward conditional branch which did not get
6866 its delay slot filled, the delay slot can still be nullified. */
6867 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6868 nullify = forward_branch_p (insn);
6870 /* A forward branch over a single nullified insn can be done with a
6871 extrs instruction. This avoids a single cycle penalty due to
6872 mis-predicted branch if we fall through (branch not taken). */
6873 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6878 /* All short conditional branches except backwards with an unfilled
6882 strcpy (buf, "{vextrs,|extrw,s,}");
6884 strcpy (buf, "{bvb,|bb,}");
6885 if (useskip && GET_MODE (operands[0]) == DImode)
6886 strcpy (buf, "extrd,s,*");
6887 else if (GET_MODE (operands[0]) == DImode)
6888 strcpy (buf, "bb,*");
6889 if ((which == 0 && negated)
6890 || (which == 1 && ! negated))
6895 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6896 else if (nullify && negated)
6898 if (branch_needs_nop_p (insn))
6899 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6901 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6903 else if (nullify && ! negated)
6905 if (branch_needs_nop_p (insn))
6906 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6908 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6910 else if (! nullify && negated)
6911 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6912 else if (! nullify && ! negated)
6913 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6916 /* All long conditionals. Note a short backward branch with an
6917 unfilled delay slot is treated just like a long backward branch
6918 with an unfilled delay slot. */
6920 /* Handle weird backwards branch with a filled delay slot
6921 which is nullified. */
6922 if (dbr_sequence_length () != 0
6923 && ! forward_branch_p (insn)
6926 strcpy (buf, "{bvb,|bb,}");
6927 if (GET_MODE (operands[0]) == DImode)
6929 if ((which == 0 && negated)
6930 || (which == 1 && ! negated))
6935 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6937 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6939 /* Handle short backwards branch with an unfilled delay slot.
6940 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6941 taken and untaken branches. */
6942 else if (dbr_sequence_length () == 0
6943 && ! forward_branch_p (insn)
6944 && INSN_ADDRESSES_SET_P ()
6945 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6946 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6948 strcpy (buf, "{bvb,|bb,}");
6949 if (GET_MODE (operands[0]) == DImode)
6951 if ((which == 0 && negated)
6952 || (which == 1 && ! negated))
6957 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6959 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6963 strcpy (buf, "{vextrs,|extrw,s,}");
6964 if (GET_MODE (operands[0]) == DImode)
6965 strcpy (buf, "extrd,s,*");
6966 if ((which == 0 && negated)
6967 || (which == 1 && ! negated))
6971 if (nullify && negated)
6972 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6973 else if (nullify && ! negated)
6974 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6976 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6978 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6983 /* The reversed conditional branch must branch over one additional
6984 instruction if the delay slot is filled and needs to be extracted
6985 by output_lbranch. If the delay slot is empty or this is a
6986 nullified forward branch, the instruction after the reversed
6987 condition branch must be nullified. */
6988 if (dbr_sequence_length () == 0
6989 || (nullify && forward_branch_p (insn)))
6993 operands[4] = GEN_INT (length);
6998 operands[4] = GEN_INT (length + 4);
7001 if (GET_MODE (operands[0]) == DImode)
7002 strcpy (buf, "bb,*");
7004 strcpy (buf, "{bvb,|bb,}");
7005 if ((which == 0 && negated)
7006 || (which == 1 && !negated))
7011 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7013 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7014 output_asm_insn (buf, operands);
7015 return output_lbranch (negated ? operands[3] : operands[2],
7021 /* Return the output template for emitting a dbra type insn.
7023 Note it may perform some output operations on its own before
7024 returning the final output string. */
7026 output_dbra (rtx *operands, rtx insn, int which_alternative)
7028 int length = get_attr_length (insn);
7030 /* A conditional branch to the following instruction (e.g. the delay slot) is
7031 asking for a disaster. Be prepared! */
7033 if (branch_to_delay_slot_p (insn))
7035 if (which_alternative == 0)
7036 return "ldo %1(%0),%0";
7037 else if (which_alternative == 1)
7039 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7040 output_asm_insn ("ldw -16(%%r30),%4", operands);
7041 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7042 return "{fldws|fldw} -16(%%r30),%0";
7046 output_asm_insn ("ldw %0,%4", operands);
7047 return "ldo %1(%4),%4\n\tstw %4,%0";
7051 if (which_alternative == 0)
7053 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7056 /* If this is a long branch with its delay slot unfilled, set `nullify'
7057 as it can nullify the delay slot and save a nop. */
7058 if (length == 8 && dbr_sequence_length () == 0)
7061 /* If this is a short forward conditional branch which did not get
7062 its delay slot filled, the delay slot can still be nullified. */
7063 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7064 nullify = forward_branch_p (insn);
7071 if (branch_needs_nop_p (insn))
7072 return "addib,%C2,n %1,%0,%3%#";
7074 return "addib,%C2,n %1,%0,%3";
7077 return "addib,%C2 %1,%0,%3";
7080 /* Handle weird backwards branch with a fulled delay slot
7081 which is nullified. */
7082 if (dbr_sequence_length () != 0
7083 && ! forward_branch_p (insn)
7085 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7086 /* Handle short backwards branch with an unfilled delay slot.
7087 Using a addb;nop rather than addi;bl saves 1 cycle for both
7088 taken and untaken branches. */
7089 else if (dbr_sequence_length () == 0
7090 && ! forward_branch_p (insn)
7091 && INSN_ADDRESSES_SET_P ()
7092 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7093 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7094 return "addib,%C2 %1,%0,%3%#";
7096 /* Handle normal cases. */
7098 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7100 return "addi,%N2 %1,%0,%0\n\tb %3";
7103 /* The reversed conditional branch must branch over one additional
7104 instruction if the delay slot is filled and needs to be extracted
7105 by output_lbranch. If the delay slot is empty or this is a
7106 nullified forward branch, the instruction after the reversed
7107 condition branch must be nullified. */
7108 if (dbr_sequence_length () == 0
7109 || (nullify && forward_branch_p (insn)))
7113 operands[4] = GEN_INT (length);
7118 operands[4] = GEN_INT (length + 4);
7122 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7124 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7126 return output_lbranch (operands[3], insn, xdelay);
7130 /* Deal with gross reload from FP register case. */
7131 else if (which_alternative == 1)
7133 /* Move loop counter from FP register to MEM then into a GR,
7134 increment the GR, store the GR into MEM, and finally reload
7135 the FP register from MEM from within the branch's delay slot. */
7136 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7138 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7140 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7141 else if (length == 28)
7142 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7145 operands[5] = GEN_INT (length - 16);
7146 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7147 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7148 return output_lbranch (operands[3], insn, 0);
7151 /* Deal with gross reload from memory case. */
7154 /* Reload loop counter from memory, the store back to memory
7155 happens in the branch's delay slot. */
7156 output_asm_insn ("ldw %0,%4", operands);
7158 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7159 else if (length == 16)
7160 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7163 operands[5] = GEN_INT (length - 4);
7164 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7165 return output_lbranch (operands[3], insn, 0);
7170 /* Return the output template for emitting a movb type insn.
7172 Note it may perform some output operations on its own before
7173 returning the final output string. */
7175 output_movb (rtx *operands, rtx insn, int which_alternative,
7176 int reverse_comparison)
7178 int length = get_attr_length (insn);
7180 /* A conditional branch to the following instruction (e.g. the delay slot) is
7181 asking for a disaster. Be prepared! */
7183 if (branch_to_delay_slot_p (insn))
7185 if (which_alternative == 0)
7186 return "copy %1,%0";
7187 else if (which_alternative == 1)
7189 output_asm_insn ("stw %1,-16(%%r30)", operands);
7190 return "{fldws|fldw} -16(%%r30),%0";
7192 else if (which_alternative == 2)
7198 /* Support the second variant. */
7199 if (reverse_comparison)
7200 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7202 if (which_alternative == 0)
7204 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7207 /* If this is a long branch with its delay slot unfilled, set `nullify'
7208 as it can nullify the delay slot and save a nop. */
7209 if (length == 8 && dbr_sequence_length () == 0)
7212 /* If this is a short forward conditional branch which did not get
7213 its delay slot filled, the delay slot can still be nullified. */
7214 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7215 nullify = forward_branch_p (insn);
7222 if (branch_needs_nop_p (insn))
7223 return "movb,%C2,n %1,%0,%3%#";
7225 return "movb,%C2,n %1,%0,%3";
7228 return "movb,%C2 %1,%0,%3";
7231 /* Handle weird backwards branch with a filled delay slot
7232 which is nullified. */
7233 if (dbr_sequence_length () != 0
7234 && ! forward_branch_p (insn)
7236 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7238 /* Handle short backwards branch with an unfilled delay slot.
7239 Using a movb;nop rather than or;bl saves 1 cycle for both
7240 taken and untaken branches. */
7241 else if (dbr_sequence_length () == 0
7242 && ! forward_branch_p (insn)
7243 && INSN_ADDRESSES_SET_P ()
7244 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7245 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7246 return "movb,%C2 %1,%0,%3%#";
7247 /* Handle normal cases. */
7249 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7251 return "or,%N2 %1,%%r0,%0\n\tb %3";
7254 /* The reversed conditional branch must branch over one additional
7255 instruction if the delay slot is filled and needs to be extracted
7256 by output_lbranch. If the delay slot is empty or this is a
7257 nullified forward branch, the instruction after the reversed
7258 condition branch must be nullified. */
7259 if (dbr_sequence_length () == 0
7260 || (nullify && forward_branch_p (insn)))
7264 operands[4] = GEN_INT (length);
7269 operands[4] = GEN_INT (length + 4);
7273 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7275 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7277 return output_lbranch (operands[3], insn, xdelay);
7280 /* Deal with gross reload for FP destination register case. */
7281 else if (which_alternative == 1)
7283 /* Move source register to MEM, perform the branch test, then
7284 finally load the FP register from MEM from within the branch's
7286 output_asm_insn ("stw %1,-16(%%r30)", operands);
7288 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7289 else if (length == 16)
7290 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7293 operands[4] = GEN_INT (length - 4);
7294 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7295 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7296 return output_lbranch (operands[3], insn, 0);
7299 /* Deal with gross reload from memory case. */
7300 else if (which_alternative == 2)
7302 /* Reload loop counter from memory, the store back to memory
7303 happens in the branch's delay slot. */
7305 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7306 else if (length == 12)
7307 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7310 operands[4] = GEN_INT (length);
7311 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7313 return output_lbranch (operands[3], insn, 0);
7316 /* Handle SAR as a destination. */
7320 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7321 else if (length == 12)
7322 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7325 operands[4] = GEN_INT (length);
7326 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7328 return output_lbranch (operands[3], insn, 0);
7333 /* Copy any FP arguments in INSN into integer registers. */
7335 copy_fp_args (rtx insn)
7340 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7342 int arg_mode, regno;
7343 rtx use = XEXP (link, 0);
7345 if (! (GET_CODE (use) == USE
7346 && GET_CODE (XEXP (use, 0)) == REG
7347 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7350 arg_mode = GET_MODE (XEXP (use, 0));
7351 regno = REGNO (XEXP (use, 0));
7353 /* Is it a floating point register? */
7354 if (regno >= 32 && regno <= 39)
7356 /* Copy the FP register into an integer register via memory. */
7357 if (arg_mode == SFmode)
7359 xoperands[0] = XEXP (use, 0);
7360 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7361 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7362 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7366 xoperands[0] = XEXP (use, 0);
7367 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7368 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7369 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7370 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7376 /* Compute length of the FP argument copy sequence for INSN. */
7378 length_fp_args (rtx insn)
7383 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7385 int arg_mode, regno;
7386 rtx use = XEXP (link, 0);
7388 if (! (GET_CODE (use) == USE
7389 && GET_CODE (XEXP (use, 0)) == REG
7390 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7393 arg_mode = GET_MODE (XEXP (use, 0));
7394 regno = REGNO (XEXP (use, 0));
7396 /* Is it a floating point register? */
7397 if (regno >= 32 && regno <= 39)
7399 if (arg_mode == SFmode)
7409 /* Return the attribute length for the millicode call instruction INSN.
7410 The length must match the code generated by output_millicode_call.
7411 We include the delay slot in the returned length as it is better to
7412 over estimate the length than to under estimate it. */
7415 attr_length_millicode_call (rtx insn)
7417 unsigned long distance = -1;
7418 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7420 if (INSN_ADDRESSES_SET_P ())
7422 distance = (total + insn_current_reference_address (insn));
7423 if (distance < total)
7429 if (!TARGET_LONG_CALLS && distance < 7600000)
7434 else if (TARGET_PORTABLE_RUNTIME)
7438 if (!TARGET_LONG_CALLS && distance < 240000)
7441 if (TARGET_LONG_ABS_CALL && !flag_pic)
7448 /* INSN is a function call. It may have an unconditional jump
7451 CALL_DEST is the routine we are calling. */
7454 output_millicode_call (rtx insn, rtx call_dest)
7456 int attr_length = get_attr_length (insn);
7457 int seq_length = dbr_sequence_length ();
7462 xoperands[0] = call_dest;
7463 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7465 /* Handle the common case where we are sure that the branch will
7466 reach the beginning of the $CODE$ subspace. The within reach
7467 form of the $$sh_func_adrs call has a length of 28. Because
7468 it has an attribute type of multi, it never has a nonzero
7469 sequence length. The length of the $$sh_func_adrs is the same
7470 as certain out of reach PIC calls to other routines. */
7471 if (!TARGET_LONG_CALLS
7472 && ((seq_length == 0
7473 && (attr_length == 12
7474 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7475 || (seq_length != 0 && attr_length == 8)))
7477 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7483 /* It might seem that one insn could be saved by accessing
7484 the millicode function using the linkage table. However,
7485 this doesn't work in shared libraries and other dynamically
7486 loaded objects. Using a pc-relative sequence also avoids
7487 problems related to the implicit use of the gp register. */
7488 output_asm_insn ("b,l .+8,%%r1", xoperands);
7492 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7493 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7497 xoperands[1] = gen_label_rtx ();
7498 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7499 targetm.asm_out.internal_label (asm_out_file, "L",
7500 CODE_LABEL_NUMBER (xoperands[1]));
7501 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7504 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7506 else if (TARGET_PORTABLE_RUNTIME)
7508 /* Pure portable runtime doesn't allow be/ble; we also don't
7509 have PIC support in the assembler/linker, so this sequence
7512 /* Get the address of our target into %r1. */
7513 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7514 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7516 /* Get our return address into %r31. */
7517 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7518 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7520 /* Jump to our target address in %r1. */
7521 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7525 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7527 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7529 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7533 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7534 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7536 if (TARGET_SOM || !TARGET_GAS)
7538 /* The HP assembler can generate relocations for the
7539 difference of two symbols. GAS can do this for a
7540 millicode symbol but not an arbitrary external
7541 symbol when generating SOM output. */
7542 xoperands[1] = gen_label_rtx ();
7543 targetm.asm_out.internal_label (asm_out_file, "L",
7544 CODE_LABEL_NUMBER (xoperands[1]));
7545 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7546 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7550 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7551 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7555 /* Jump to our target address in %r1. */
7556 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7560 if (seq_length == 0)
7561 output_asm_insn ("nop", xoperands);
7563 /* We are done if there isn't a jump in the delay slot. */
7564 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7567 /* This call has an unconditional jump in its delay slot. */
7568 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7570 /* See if the return address can be adjusted. Use the containing
7571 sequence insn's address. */
7572 if (INSN_ADDRESSES_SET_P ())
7574 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7575 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7576 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7578 if (VAL_14_BITS_P (distance))
7580 xoperands[1] = gen_label_rtx ();
7581 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7582 targetm.asm_out.internal_label (asm_out_file, "L",
7583 CODE_LABEL_NUMBER (xoperands[1]));
7586 /* ??? This branch may not reach its target. */
7587 output_asm_insn ("nop\n\tb,n %0", xoperands);
7590 /* ??? This branch may not reach its target. */
7591 output_asm_insn ("nop\n\tb,n %0", xoperands);
7593 /* Delete the jump. */
7594 SET_INSN_DELETED (NEXT_INSN (insn));
7599 /* Return the attribute length of the call instruction INSN. The SIBCALL
7600 flag indicates whether INSN is a regular call or a sibling call. The
7601 length returned must be longer than the code actually generated by
7602 output_call. Since branch shortening is done before delay branch
7603 sequencing, there is no way to determine whether or not the delay
7604 slot will be filled during branch shortening. Even when the delay
7605 slot is filled, we may have to add a nop if the delay slot contains
7606 a branch that can't reach its target. Thus, we always have to include
7607 the delay slot in the length estimate. This used to be done in
7608 pa_adjust_insn_length but we do it here now as some sequences always
7609 fill the delay slot and we can save four bytes in the estimate for
7613 attr_length_call (rtx insn, int sibcall)
7616 rtx call, call_dest;
7619 rtx pat = PATTERN (insn);
7620 unsigned long distance = -1;
7622 gcc_assert (GET_CODE (insn) == CALL_INSN);
7624 if (INSN_ADDRESSES_SET_P ())
7626 unsigned long total;
7628 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7629 distance = (total + insn_current_reference_address (insn));
7630 if (distance < total)
7634 gcc_assert (GET_CODE (pat) == PARALLEL);
7636 /* Get the call rtx. */
7637 call = XVECEXP (pat, 0, 0);
7638 if (GET_CODE (call) == SET)
7639 call = SET_SRC (call);
7641 gcc_assert (GET_CODE (call) == CALL);
7643 /* Determine if this is a local call. */
7644 call_dest = XEXP (XEXP (call, 0), 0);
7645 call_decl = SYMBOL_REF_DECL (call_dest);
7646 local_call = call_decl && targetm.binds_local_p (call_decl);
7648 /* pc-relative branch. */
7649 if (!TARGET_LONG_CALLS
7650 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7651 || distance < 240000))
7654 /* 64-bit plabel sequence. */
7655 else if (TARGET_64BIT && !local_call)
7656 length += sibcall ? 28 : 24;
7658 /* non-pic long absolute branch sequence. */
7659 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7662 /* long pc-relative branch sequence. */
7663 else if (TARGET_LONG_PIC_SDIFF_CALL
7664 || (TARGET_GAS && !TARGET_SOM
7665 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7669 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7673 /* 32-bit plabel sequence. */
7679 length += length_fp_args (insn);
7689 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7697 /* INSN is a function call. It may have an unconditional jump
7700 CALL_DEST is the routine we are calling. */
7703 output_call (rtx insn, rtx call_dest, int sibcall)
7705 int delay_insn_deleted = 0;
7706 int delay_slot_filled = 0;
7707 int seq_length = dbr_sequence_length ();
7708 tree call_decl = SYMBOL_REF_DECL (call_dest);
7709 int local_call = call_decl && targetm.binds_local_p (call_decl);
7712 xoperands[0] = call_dest;
7714 /* Handle the common case where we're sure that the branch will reach
7715 the beginning of the "$CODE$" subspace. This is the beginning of
7716 the current function if we are in a named section. */
7717 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7719 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7720 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7724 if (TARGET_64BIT && !local_call)
7726 /* ??? As far as I can tell, the HP linker doesn't support the
7727 long pc-relative sequence described in the 64-bit runtime
7728 architecture. So, we use a slightly longer indirect call. */
7729 xoperands[0] = get_deferred_plabel (call_dest);
7730 xoperands[1] = gen_label_rtx ();
7732 /* If this isn't a sibcall, we put the load of %r27 into the
7733 delay slot. We can't do this in a sibcall as we don't
7734 have a second call-clobbered scratch register available. */
7736 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7739 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7742 /* Now delete the delay insn. */
7743 SET_INSN_DELETED (NEXT_INSN (insn));
7744 delay_insn_deleted = 1;
7747 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7748 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7749 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7753 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7754 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7755 output_asm_insn ("bve (%%r1)", xoperands);
7759 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7760 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7761 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7762 delay_slot_filled = 1;
7767 int indirect_call = 0;
7769 /* Emit a long call. There are several different sequences
7770 of increasing length and complexity. In most cases,
7771 they don't allow an instruction in the delay slot. */
7772 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7773 && !TARGET_LONG_PIC_SDIFF_CALL
7774 && !(TARGET_GAS && !TARGET_SOM
7775 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7780 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7784 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7786 /* A non-jump insn in the delay slot. By definition we can
7787 emit this insn before the call (and in fact before argument
7789 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7792 /* Now delete the delay insn. */
7793 SET_INSN_DELETED (NEXT_INSN (insn));
7794 delay_insn_deleted = 1;
7797 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7799 /* This is the best sequence for making long calls in
7800 non-pic code. Unfortunately, GNU ld doesn't provide
7801 the stub needed for external calls, and GAS's support
7802 for this with the SOM linker is buggy. It is safe
7803 to use this for local calls. */
7804 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7806 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7810 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7813 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7815 output_asm_insn ("copy %%r31,%%r2", xoperands);
7816 delay_slot_filled = 1;
7821 if (TARGET_LONG_PIC_SDIFF_CALL)
7823 /* The HP assembler and linker can handle relocations
7824 for the difference of two symbols. The HP assembler
7825 recognizes the sequence as a pc-relative call and
7826 the linker provides stubs when needed. */
7827 xoperands[1] = gen_label_rtx ();
7828 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7829 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7830 targetm.asm_out.internal_label (asm_out_file, "L",
7831 CODE_LABEL_NUMBER (xoperands[1]));
7832 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7834 else if (TARGET_GAS && !TARGET_SOM
7835 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7837 /* GAS currently can't generate the relocations that
7838 are needed for the SOM linker under HP-UX using this
7839 sequence. The GNU linker doesn't generate the stubs
7840 that are needed for external calls on TARGET_ELF32
7841 with this sequence. For now, we have to use a
7842 longer plabel sequence when using GAS. */
7843 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7844 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7846 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7851 /* Emit a long plabel-based call sequence. This is
7852 essentially an inline implementation of $$dyncall.
7853 We don't actually try to call $$dyncall as this is
7854 as difficult as calling the function itself. */
7855 xoperands[0] = get_deferred_plabel (call_dest);
7856 xoperands[1] = gen_label_rtx ();
7858 /* Since the call is indirect, FP arguments in registers
7859 need to be copied to the general registers. Then, the
7860 argument relocation stub will copy them back. */
7862 copy_fp_args (insn);
7866 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7867 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7868 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7872 output_asm_insn ("addil LR'%0-$global$,%%r27",
7874 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7878 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7879 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7880 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7881 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7883 if (!sibcall && !TARGET_PA_20)
7885 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7886 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7887 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7889 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7896 output_asm_insn ("bve (%%r1)", xoperands);
7901 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7902 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7903 delay_slot_filled = 1;
7906 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7911 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7912 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7917 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7918 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7920 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7924 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7925 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7927 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7930 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7932 output_asm_insn ("copy %%r31,%%r2", xoperands);
7933 delay_slot_filled = 1;
7940 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7941 output_asm_insn ("nop", xoperands);
7943 /* We are done if there isn't a jump in the delay slot. */
7945 || delay_insn_deleted
7946 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7949 /* A sibcall should never have a branch in the delay slot. */
7950 gcc_assert (!sibcall);
7952 /* This call has an unconditional jump in its delay slot. */
7953 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7955 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7957 /* See if the return address can be adjusted. Use the containing
7958 sequence insn's address. This would break the regular call/return@
7959 relationship assumed by the table based eh unwinder, so only do that
7960 if the call is not possibly throwing. */
7961 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7962 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7963 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7965 if (VAL_14_BITS_P (distance)
7966 && !(can_throw_internal (insn) || can_throw_external (insn)))
7968 xoperands[1] = gen_label_rtx ();
7969 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7970 targetm.asm_out.internal_label (asm_out_file, "L",
7971 CODE_LABEL_NUMBER (xoperands[1]));
7974 output_asm_insn ("nop\n\tb,n %0", xoperands);
7977 output_asm_insn ("b,n %0", xoperands);
7979 /* Delete the jump. */
7980 SET_INSN_DELETED (NEXT_INSN (insn));
7985 /* Return the attribute length of the indirect call instruction INSN.
7986 The length must match the code generated by output_indirect call.
7987 The returned length includes the delay slot. Currently, the delay
7988 slot of an indirect call sequence is not exposed and it is used by
7989 the sequence itself. */
7992 attr_length_indirect_call (rtx insn)
7994 unsigned long distance = -1;
7995 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7997 if (INSN_ADDRESSES_SET_P ())
7999 distance = (total + insn_current_reference_address (insn));
8000 if (distance < total)
8007 if (TARGET_FAST_INDIRECT_CALLS
8008 || (!TARGET_PORTABLE_RUNTIME
8009 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8010 || distance < 240000)))
8016 if (TARGET_PORTABLE_RUNTIME)
8019 /* Out of reach, can use ble. */
8024 output_indirect_call (rtx insn, rtx call_dest)
8030 xoperands[0] = call_dest;
8031 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
8032 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
8036 /* First the special case for kernels, level 0 systems, etc. */
8037 if (TARGET_FAST_INDIRECT_CALLS)
8038 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8040 /* Now the normal case -- we can reach $$dyncall directly or
8041 we're sure that we can get there via a long-branch stub.
8043 No need to check target flags as the length uniquely identifies
8044 the remaining cases. */
8045 if (attr_length_indirect_call (insn) == 8)
8047 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8048 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8049 variant of the B,L instruction can't be used on the SOM target. */
8050 if (TARGET_PA_20 && !TARGET_SOM)
8051 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8053 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8056 /* Long millicode call, but we are not generating PIC or portable runtime
8058 if (attr_length_indirect_call (insn) == 12)
8059 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8061 /* Long millicode call for portable runtime. */
8062 if (attr_length_indirect_call (insn) == 20)
8063 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
8065 /* We need a long PIC call to $$dyncall. */
8066 xoperands[0] = NULL_RTX;
8067 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8068 if (TARGET_SOM || !TARGET_GAS)
8070 xoperands[0] = gen_label_rtx ();
8071 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
8072 targetm.asm_out.internal_label (asm_out_file, "L",
8073 CODE_LABEL_NUMBER (xoperands[0]));
8074 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
8078 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
8079 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8082 output_asm_insn ("blr %%r0,%%r2", xoperands);
8083 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
8087 /* Return the total length of the save and restore instructions needed for
8088 the data linkage table pointer (i.e., the PIC register) across the call
8089 instruction INSN. No-return calls do not require a save and restore.
8090 In addition, we may be able to avoid the save and restore for calls
8091 within the same translation unit. */
8094 attr_length_save_restore_dltp (rtx insn)
8096 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
8102 /* In HPUX 8.0's shared library scheme, special relocations are needed
8103 for function labels if they might be passed to a function
8104 in a shared library (because shared libraries don't live in code
8105 space), and special magic is needed to construct their address. */
8108 hppa_encode_label (rtx sym)
8110 const char *str = XSTR (sym, 0);
8111 int len = strlen (str) + 1;
8114 p = newstr = XALLOCAVEC (char, len + 1);
8118 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8122 pa_encode_section_info (tree decl, rtx rtl, int first)
8124 int old_referenced = 0;
8126 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8128 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8130 default_encode_section_info (decl, rtl, first);
8132 if (first && TEXT_SPACE_P (decl))
8134 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8135 if (TREE_CODE (decl) == FUNCTION_DECL)
8136 hppa_encode_label (XEXP (rtl, 0));
8138 else if (old_referenced)
8139 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8142 /* This is sort of inverse to pa_encode_section_info. */
8145 pa_strip_name_encoding (const char *str)
8147 str += (*str == '@');
8148 str += (*str == '*');
8152 /* Returns 1 if OP is a function label involved in a simple addition
8153 with a constant. Used to keep certain patterns from matching
8154 during instruction combination. */
8156 is_function_label_plus_const (rtx op)
8158 /* Strip off any CONST. */
8159 if (GET_CODE (op) == CONST)
8162 return (GET_CODE (op) == PLUS
8163 && function_label_operand (XEXP (op, 0), VOIDmode)
8164 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8167 /* Output assembly code for a thunk to FUNCTION. */
8170 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8171 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8174 static unsigned int current_thunk_number;
8175 int val_14 = VAL_14_BITS_P (delta);
8176 unsigned int old_last_address = last_address, nbytes = 0;
8180 xoperands[0] = XEXP (DECL_RTL (function), 0);
8181 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8182 xoperands[2] = GEN_INT (delta);
8184 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8185 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8187 /* Output the thunk. We know that the function is in the same
8188 translation unit (i.e., the same space) as the thunk, and that
8189 thunks are output after their method. Thus, we don't need an
8190 external branch to reach the function. With SOM and GAS,
8191 functions and thunks are effectively in different sections.
8192 Thus, we can always use a IA-relative branch and the linker
8193 will add a long branch stub if necessary.
8195 However, we have to be careful when generating PIC code on the
8196 SOM port to ensure that the sequence does not transfer to an
8197 import stub for the target function as this could clobber the
8198 return value saved at SP-24. This would also apply to the
8199 32-bit linux port if the multi-space model is implemented. */
8200 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8201 && !(flag_pic && TREE_PUBLIC (function))
8202 && (TARGET_GAS || last_address < 262132))
8203 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8204 && ((targetm.have_named_sections
8205 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8206 /* The GNU 64-bit linker has rather poor stub management.
8207 So, we use a long branch from thunks that aren't in
8208 the same section as the target function. */
8210 && (DECL_SECTION_NAME (thunk_fndecl)
8211 != DECL_SECTION_NAME (function)))
8212 || ((DECL_SECTION_NAME (thunk_fndecl)
8213 == DECL_SECTION_NAME (function))
8214 && last_address < 262132)))
8215 || (targetm.have_named_sections
8216 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8217 && DECL_SECTION_NAME (function) == NULL
8218 && last_address < 262132)
8219 || (!targetm.have_named_sections && last_address < 262132))))
8222 output_asm_insn ("addil L'%2,%%r26", xoperands);
8224 output_asm_insn ("b %0", xoperands);
8228 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8233 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8237 else if (TARGET_64BIT)
8239 /* We only have one call-clobbered scratch register, so we can't
8240 make use of the delay slot if delta doesn't fit in 14 bits. */
8243 output_asm_insn ("addil L'%2,%%r26", xoperands);
8244 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8247 output_asm_insn ("b,l .+8,%%r1", xoperands);
8251 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8252 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8256 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8257 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8262 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8263 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8268 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8272 else if (TARGET_PORTABLE_RUNTIME)
8274 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8275 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8278 output_asm_insn ("addil L'%2,%%r26", xoperands);
8280 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8284 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8289 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8293 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8295 /* The function is accessible from outside this module. The only
8296 way to avoid an import stub between the thunk and function is to
8297 call the function directly with an indirect sequence similar to
8298 that used by $$dyncall. This is possible because $$dyncall acts
8299 as the import stub in an indirect call. */
8300 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8301 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8302 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8303 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8304 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8305 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8306 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8307 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8308 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8312 output_asm_insn ("addil L'%2,%%r26", xoperands);
8318 output_asm_insn ("bve (%%r22)", xoperands);
8321 else if (TARGET_NO_SPACE_REGS)
8323 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8328 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8329 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8330 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8335 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8337 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8341 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8343 if (TARGET_SOM || !TARGET_GAS)
8345 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8346 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8350 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8351 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8355 output_asm_insn ("addil L'%2,%%r26", xoperands);
8357 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8361 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8366 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8373 output_asm_insn ("addil L'%2,%%r26", xoperands);
8375 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8376 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8380 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8385 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8390 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8392 if (TARGET_SOM && TARGET_GAS)
8394 /* We done with this subspace except possibly for some additional
8395 debug information. Forget that we are in this subspace to ensure
8396 that the next function is output in its own subspace. */
8398 cfun->machine->in_nsubspa = 2;
8401 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8403 switch_to_section (data_section);
8404 output_asm_insn (".align 4", xoperands);
8405 ASM_OUTPUT_LABEL (file, label);
8406 output_asm_insn (".word P'%0", xoperands);
8409 current_thunk_number++;
8410 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8411 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8412 last_address += nbytes;
8413 if (old_last_address > last_address)
8414 last_address = UINT_MAX;
8415 update_total_code_bytes (nbytes);
8418 /* Only direct calls to static functions are allowed to be sibling (tail)
8421 This restriction is necessary because some linker generated stubs will
8422 store return pointers into rp' in some cases which might clobber a
8423 live value already in rp'.
8425 In a sibcall the current function and the target function share stack
8426 space. Thus if the path to the current function and the path to the
8427 target function save a value in rp', they save the value into the
8428 same stack slot, which has undesirable consequences.
8430 Because of the deferred binding nature of shared libraries any function
8431 with external scope could be in a different load module and thus require
8432 rp' to be saved when calling that function. So sibcall optimizations
8433 can only be safe for static function.
8435 Note that GCC never needs return value relocations, so we don't have to
8436 worry about static calls with return value relocations (which require
8439 It is safe to perform a sibcall optimization when the target function
8440 will never return. */
8442 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8444 if (TARGET_PORTABLE_RUNTIME)
8447 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8448 single subspace mode and the call is not indirect. As far as I know,
8449 there is no operating system support for the multiple subspace mode.
8450 It might be possible to support indirect calls if we didn't use
8451 $$dyncall (see the indirect sequence generated in output_call). */
8453 return (decl != NULL_TREE);
8455 /* Sibcalls are not ok because the arg pointer register is not a fixed
8456 register. This prevents the sibcall optimization from occurring. In
8457 addition, there are problems with stub placement using GNU ld. This
8458 is because a normal sibcall branch uses a 17-bit relocation while
8459 a regular call branch uses a 22-bit relocation. As a result, more
8460 care needs to be taken in the placement of long-branch stubs. */
8464 /* Sibcalls are only ok within a translation unit. */
8465 return (decl && !TREE_PUBLIC (decl));
8468 /* ??? Addition is not commutative on the PA due to the weird implicit
8469 space register selection rules for memory addresses. Therefore, we
8470 don't consider a + b == b + a, as this might be inside a MEM. */
8472 pa_commutative_p (const_rtx x, int outer_code)
8474 return (COMMUTATIVE_P (x)
8475 && (TARGET_NO_SPACE_REGS
8476 || (outer_code != UNKNOWN && outer_code != MEM)
8477 || GET_CODE (x) != PLUS));
8480 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8481 use in fmpyadd instructions. */
8483 fmpyaddoperands (rtx *operands)
8485 enum machine_mode mode = GET_MODE (operands[0]);
8487 /* Must be a floating point mode. */
8488 if (mode != SFmode && mode != DFmode)
8491 /* All modes must be the same. */
8492 if (! (mode == GET_MODE (operands[1])
8493 && mode == GET_MODE (operands[2])
8494 && mode == GET_MODE (operands[3])
8495 && mode == GET_MODE (operands[4])
8496 && mode == GET_MODE (operands[5])))
8499 /* All operands must be registers. */
8500 if (! (GET_CODE (operands[1]) == REG
8501 && GET_CODE (operands[2]) == REG
8502 && GET_CODE (operands[3]) == REG
8503 && GET_CODE (operands[4]) == REG
8504 && GET_CODE (operands[5]) == REG))
8507 /* Only 2 real operands to the addition. One of the input operands must
8508 be the same as the output operand. */
8509 if (! rtx_equal_p (operands[3], operands[4])
8510 && ! rtx_equal_p (operands[3], operands[5]))
8513 /* Inout operand of add cannot conflict with any operands from multiply. */
8514 if (rtx_equal_p (operands[3], operands[0])
8515 || rtx_equal_p (operands[3], operands[1])
8516 || rtx_equal_p (operands[3], operands[2]))
8519 /* multiply cannot feed into addition operands. */
8520 if (rtx_equal_p (operands[4], operands[0])
8521 || rtx_equal_p (operands[5], operands[0]))
8524 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8526 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8527 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8528 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8529 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8530 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8531 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8534 /* Passed. Operands are suitable for fmpyadd. */
8538 #if !defined(USE_COLLECT2)
8540 pa_asm_out_constructor (rtx symbol, int priority)
8542 if (!function_label_operand (symbol, VOIDmode))
8543 hppa_encode_label (symbol);
8545 #ifdef CTORS_SECTION_ASM_OP
8546 default_ctor_section_asm_out_constructor (symbol, priority);
8548 # ifdef TARGET_ASM_NAMED_SECTION
8549 default_named_section_asm_out_constructor (symbol, priority);
8551 default_stabs_asm_out_constructor (symbol, priority);
8557 pa_asm_out_destructor (rtx symbol, int priority)
8559 if (!function_label_operand (symbol, VOIDmode))
8560 hppa_encode_label (symbol);
8562 #ifdef DTORS_SECTION_ASM_OP
8563 default_dtor_section_asm_out_destructor (symbol, priority);
8565 # ifdef TARGET_ASM_NAMED_SECTION
8566 default_named_section_asm_out_destructor (symbol, priority);
8568 default_stabs_asm_out_destructor (symbol, priority);
8574 /* This function places uninitialized global data in the bss section.
8575 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8576 function on the SOM port to prevent uninitialized global data from
8577 being placed in the data section. */
8580 pa_asm_output_aligned_bss (FILE *stream,
8582 unsigned HOST_WIDE_INT size,
8585 switch_to_section (bss_section);
8586 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8588 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8589 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8592 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8593 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8596 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8597 ASM_OUTPUT_LABEL (stream, name);
8598 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8601 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8602 that doesn't allow the alignment of global common storage to be directly
8603 specified. The SOM linker aligns common storage based on the rounded
8604 value of the NUM_BYTES parameter in the .comm directive. It's not
8605 possible to use the .align directive as it doesn't affect the alignment
8606 of the label associated with a .comm directive. */
8609 pa_asm_output_aligned_common (FILE *stream,
8611 unsigned HOST_WIDE_INT size,
8614 unsigned int max_common_align;
8616 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8617 if (align > max_common_align)
8619 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8620 "for global common data. Using %u",
8621 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8622 align = max_common_align;
8625 switch_to_section (bss_section);
8627 assemble_name (stream, name);
8628 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8629 MAX (size, align / BITS_PER_UNIT));
8632 /* We can't use .comm for local common storage as the SOM linker effectively
8633 treats the symbol as universal and uses the same storage for local symbols
8634 with the same name in different object files. The .block directive
8635 reserves an uninitialized block of storage. However, it's not common
8636 storage. Fortunately, GCC never requests common storage with the same
8637 name in any given translation unit. */
8640 pa_asm_output_aligned_local (FILE *stream,
8642 unsigned HOST_WIDE_INT size,
8645 switch_to_section (bss_section);
8646 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8649 fprintf (stream, "%s", LOCAL_ASM_OP);
8650 assemble_name (stream, name);
8651 fprintf (stream, "\n");
8654 ASM_OUTPUT_LABEL (stream, name);
8655 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8658 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8659 use in fmpysub instructions. */
8661 fmpysuboperands (rtx *operands)
8663 enum machine_mode mode = GET_MODE (operands[0]);
8665 /* Must be a floating point mode. */
8666 if (mode != SFmode && mode != DFmode)
8669 /* All modes must be the same. */
8670 if (! (mode == GET_MODE (operands[1])
8671 && mode == GET_MODE (operands[2])
8672 && mode == GET_MODE (operands[3])
8673 && mode == GET_MODE (operands[4])
8674 && mode == GET_MODE (operands[5])))
8677 /* All operands must be registers. */
8678 if (! (GET_CODE (operands[1]) == REG
8679 && GET_CODE (operands[2]) == REG
8680 && GET_CODE (operands[3]) == REG
8681 && GET_CODE (operands[4]) == REG
8682 && GET_CODE (operands[5]) == REG))
8685 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8686 operation, so operands[4] must be the same as operand[3]. */
8687 if (! rtx_equal_p (operands[3], operands[4]))
8690 /* multiply cannot feed into subtraction. */
8691 if (rtx_equal_p (operands[5], operands[0]))
8694 /* Inout operand of sub cannot conflict with any operands from multiply. */
8695 if (rtx_equal_p (operands[3], operands[0])
8696 || rtx_equal_p (operands[3], operands[1])
8697 || rtx_equal_p (operands[3], operands[2]))
8700 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8702 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8703 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8704 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8705 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8706 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8707 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8710 /* Passed. Operands are suitable for fmpysub. */
8714 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8715 constants for shadd instructions. */
8717 shadd_constant_p (int val)
8719 if (val == 2 || val == 4 || val == 8)
8725 /* Return TRUE if INSN branches forward. */
8728 forward_branch_p (rtx insn)
8730 rtx lab = JUMP_LABEL (insn);
8732 /* The INSN must have a jump label. */
8733 gcc_assert (lab != NULL_RTX);
8735 if (INSN_ADDRESSES_SET_P ())
8736 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8743 insn = NEXT_INSN (insn);
8749 /* Return 1 if INSN is in the delay slot of a call instruction. */
8751 jump_in_call_delay (rtx insn)
8754 if (GET_CODE (insn) != JUMP_INSN)
8757 if (PREV_INSN (insn)
8758 && PREV_INSN (PREV_INSN (insn))
8759 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8761 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8763 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8764 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8771 /* Output an unconditional move and branch insn. */
8774 output_parallel_movb (rtx *operands, rtx insn)
8776 int length = get_attr_length (insn);
8778 /* These are the cases in which we win. */
8780 return "mov%I1b,tr %1,%0,%2";
8782 /* None of the following cases win, but they don't lose either. */
8785 if (dbr_sequence_length () == 0)
8787 /* Nothing in the delay slot, fake it by putting the combined
8788 insn (the copy or add) in the delay slot of a bl. */
8789 if (GET_CODE (operands[1]) == CONST_INT)
8790 return "b %2\n\tldi %1,%0";
8792 return "b %2\n\tcopy %1,%0";
8796 /* Something in the delay slot, but we've got a long branch. */
8797 if (GET_CODE (operands[1]) == CONST_INT)
8798 return "ldi %1,%0\n\tb %2";
8800 return "copy %1,%0\n\tb %2";
8804 if (GET_CODE (operands[1]) == CONST_INT)
8805 output_asm_insn ("ldi %1,%0", operands);
8807 output_asm_insn ("copy %1,%0", operands);
8808 return output_lbranch (operands[2], insn, 1);
8811 /* Output an unconditional add and branch insn. */
8814 output_parallel_addb (rtx *operands, rtx insn)
8816 int length = get_attr_length (insn);
8818 /* To make life easy we want operand0 to be the shared input/output
8819 operand and operand1 to be the readonly operand. */
8820 if (operands[0] == operands[1])
8821 operands[1] = operands[2];
8823 /* These are the cases in which we win. */
8825 return "add%I1b,tr %1,%0,%3";
8827 /* None of the following cases win, but they don't lose either. */
8830 if (dbr_sequence_length () == 0)
8831 /* Nothing in the delay slot, fake it by putting the combined
8832 insn (the copy or add) in the delay slot of a bl. */
8833 return "b %3\n\tadd%I1 %1,%0,%0";
8835 /* Something in the delay slot, but we've got a long branch. */
8836 return "add%I1 %1,%0,%0\n\tb %3";
8839 output_asm_insn ("add%I1 %1,%0,%0", operands);
8840 return output_lbranch (operands[3], insn, 1);
8843 /* Return nonzero if INSN (a jump insn) immediately follows a call
8844 to a named function. This is used to avoid filling the delay slot
8845 of the jump since it can usually be eliminated by modifying RP in
8846 the delay slot of the call. */
8849 following_call (rtx insn)
8851 if (! TARGET_JUMP_IN_DELAY)
8854 /* Find the previous real insn, skipping NOTEs. */
8855 insn = PREV_INSN (insn);
8856 while (insn && GET_CODE (insn) == NOTE)
8857 insn = PREV_INSN (insn);
8859 /* Check for CALL_INSNs and millicode calls. */
8861 && ((GET_CODE (insn) == CALL_INSN
8862 && get_attr_type (insn) != TYPE_DYNCALL)
8863 || (GET_CODE (insn) == INSN
8864 && GET_CODE (PATTERN (insn)) != SEQUENCE
8865 && GET_CODE (PATTERN (insn)) != USE
8866 && GET_CODE (PATTERN (insn)) != CLOBBER
8867 && get_attr_type (insn) == TYPE_MILLI)))
8873 /* We use this hook to perform a PA specific optimization which is difficult
8874 to do in earlier passes.
8876 We want the delay slots of branches within jump tables to be filled.
8877 None of the compiler passes at the moment even has the notion that a
8878 PA jump table doesn't contain addresses, but instead contains actual
8881 Because we actually jump into the table, the addresses of each entry
8882 must stay constant in relation to the beginning of the table (which
8883 itself must stay constant relative to the instruction to jump into
8884 it). I don't believe we can guarantee earlier passes of the compiler
8885 will adhere to those rules.
8887 So, late in the compilation process we find all the jump tables, and
8888 expand them into real code -- e.g. each entry in the jump table vector
8889 will get an appropriate label followed by a jump to the final target.
8891 Reorg and the final jump pass can then optimize these branches and
8892 fill their delay slots. We end up with smaller, more efficient code.
8894 The jump instructions within the table are special; we must be able
8895 to identify them during assembly output (if the jumps don't get filled
8896 we need to emit a nop rather than nullifying the delay slot)). We
8897 identify jumps in switch tables by using insns with the attribute
8898 type TYPE_BTABLE_BRANCH.
8900 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8901 insns. This serves two purposes, first it prevents jump.c from
8902 noticing that the last N entries in the table jump to the instruction
8903 immediately after the table and deleting the jumps. Second, those
8904 insns mark where we should emit .begin_brtab and .end_brtab directives
8905 when using GAS (allows for better link time optimizations). */
8912 remove_useless_addtr_insns (1);
8914 if (pa_cpu < PROCESSOR_8000)
8915 pa_combine_instructions ();
8918 /* This is fairly cheap, so always run it if optimizing. */
8919 if (optimize > 0 && !TARGET_BIG_SWITCH)
8921 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8922 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8924 rtx pattern, tmp, location, label;
8925 unsigned int length, i;
8927 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8928 if (GET_CODE (insn) != JUMP_INSN
8929 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8930 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8933 /* Emit marker for the beginning of the branch table. */
8934 emit_insn_before (gen_begin_brtab (), insn);
8936 pattern = PATTERN (insn);
8937 location = PREV_INSN (insn);
8938 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8940 for (i = 0; i < length; i++)
8942 /* Emit a label before each jump to keep jump.c from
8943 removing this code. */
8944 tmp = gen_label_rtx ();
8945 LABEL_NUSES (tmp) = 1;
8946 emit_label_after (tmp, location);
8947 location = NEXT_INSN (location);
8949 if (GET_CODE (pattern) == ADDR_VEC)
8950 label = XEXP (XVECEXP (pattern, 0, i), 0);
8952 label = XEXP (XVECEXP (pattern, 1, i), 0);
8954 tmp = gen_short_jump (label);
8956 /* Emit the jump itself. */
8957 tmp = emit_jump_insn_after (tmp, location);
8958 JUMP_LABEL (tmp) = label;
8959 LABEL_NUSES (label)++;
8960 location = NEXT_INSN (location);
8962 /* Emit a BARRIER after the jump. */
8963 emit_barrier_after (location);
8964 location = NEXT_INSN (location);
8967 /* Emit marker for the end of the branch table. */
8968 emit_insn_before (gen_end_brtab (), location);
8969 location = NEXT_INSN (location);
8970 emit_barrier_after (location);
8972 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8978 /* Still need brtab marker insns. FIXME: the presence of these
8979 markers disables output of the branch table to readonly memory,
8980 and any alignment directives that might be needed. Possibly,
8981 the begin_brtab insn should be output before the label for the
8982 table. This doesn't matter at the moment since the tables are
8983 always output in the text section. */
8984 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8986 /* Find an ADDR_VEC insn. */
8987 if (GET_CODE (insn) != JUMP_INSN
8988 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8989 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8992 /* Now generate markers for the beginning and end of the
8994 emit_insn_before (gen_begin_brtab (), insn);
8995 emit_insn_after (gen_end_brtab (), insn);
9000 /* The PA has a number of odd instructions which can perform multiple
9001 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9002 it may be profitable to combine two instructions into one instruction
9003 with two outputs. It's not profitable PA2.0 machines because the
9004 two outputs would take two slots in the reorder buffers.
9006 This routine finds instructions which can be combined and combines
9007 them. We only support some of the potential combinations, and we
9008 only try common ways to find suitable instructions.
9010 * addb can add two registers or a register and a small integer
9011 and jump to a nearby (+-8k) location. Normally the jump to the
9012 nearby location is conditional on the result of the add, but by
9013 using the "true" condition we can make the jump unconditional.
9014 Thus addb can perform two independent operations in one insn.
9016 * movb is similar to addb in that it can perform a reg->reg
9017 or small immediate->reg copy and jump to a nearby (+-8k location).
9019 * fmpyadd and fmpysub can perform a FP multiply and either an
9020 FP add or FP sub if the operands of the multiply and add/sub are
9021 independent (there are other minor restrictions). Note both
9022 the fmpy and fadd/fsub can in theory move to better spots according
9023 to data dependencies, but for now we require the fmpy stay at a
9026 * Many of the memory operations can perform pre & post updates
9027 of index registers. GCC's pre/post increment/decrement addressing
9028 is far too simple to take advantage of all the possibilities. This
9029 pass may not be suitable since those insns may not be independent.
9031 * comclr can compare two ints or an int and a register, nullify
9032 the following instruction and zero some other register. This
9033 is more difficult to use as it's harder to find an insn which
9034 will generate a comclr than finding something like an unconditional
9035 branch. (conditional moves & long branches create comclr insns).
9037 * Most arithmetic operations can conditionally skip the next
9038 instruction. They can be viewed as "perform this operation
9039 and conditionally jump to this nearby location" (where nearby
9040 is an insns away). These are difficult to use due to the
9041 branch length restrictions. */
9044 pa_combine_instructions (void)
9046 rtx anchor, new_rtx;
9048 /* This can get expensive since the basic algorithm is on the
9049 order of O(n^2) (or worse). Only do it for -O2 or higher
9050 levels of optimization. */
9054 /* Walk down the list of insns looking for "anchor" insns which
9055 may be combined with "floating" insns. As the name implies,
9056 "anchor" instructions don't move, while "floating" insns may
9058 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9059 new_rtx = make_insn_raw (new_rtx);
9061 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9063 enum attr_pa_combine_type anchor_attr;
9064 enum attr_pa_combine_type floater_attr;
9066 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9067 Also ignore any special USE insns. */
9068 if ((GET_CODE (anchor) != INSN
9069 && GET_CODE (anchor) != JUMP_INSN
9070 && GET_CODE (anchor) != CALL_INSN)
9071 || GET_CODE (PATTERN (anchor)) == USE
9072 || GET_CODE (PATTERN (anchor)) == CLOBBER
9073 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
9074 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
9077 anchor_attr = get_attr_pa_combine_type (anchor);
9078 /* See if anchor is an insn suitable for combination. */
9079 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9080 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9081 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9082 && ! forward_branch_p (anchor)))
9086 for (floater = PREV_INSN (anchor);
9088 floater = PREV_INSN (floater))
9090 if (GET_CODE (floater) == NOTE
9091 || (GET_CODE (floater) == INSN
9092 && (GET_CODE (PATTERN (floater)) == USE
9093 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9096 /* Anything except a regular INSN will stop our search. */
9097 if (GET_CODE (floater) != INSN
9098 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9099 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9105 /* See if FLOATER is suitable for combination with the
9107 floater_attr = get_attr_pa_combine_type (floater);
9108 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9109 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9110 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9111 && floater_attr == PA_COMBINE_TYPE_FMPY))
9113 /* If ANCHOR and FLOATER can be combined, then we're
9114 done with this pass. */
9115 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9116 SET_DEST (PATTERN (floater)),
9117 XEXP (SET_SRC (PATTERN (floater)), 0),
9118 XEXP (SET_SRC (PATTERN (floater)), 1)))
9122 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9123 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9125 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9127 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9128 SET_DEST (PATTERN (floater)),
9129 XEXP (SET_SRC (PATTERN (floater)), 0),
9130 XEXP (SET_SRC (PATTERN (floater)), 1)))
9135 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9136 SET_DEST (PATTERN (floater)),
9137 SET_SRC (PATTERN (floater)),
9138 SET_SRC (PATTERN (floater))))
9144 /* If we didn't find anything on the backwards scan try forwards. */
9146 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9147 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9149 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9151 if (GET_CODE (floater) == NOTE
9152 || (GET_CODE (floater) == INSN
9153 && (GET_CODE (PATTERN (floater)) == USE
9154 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9158 /* Anything except a regular INSN will stop our search. */
9159 if (GET_CODE (floater) != INSN
9160 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9161 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9167 /* See if FLOATER is suitable for combination with the
9169 floater_attr = get_attr_pa_combine_type (floater);
9170 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9171 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9172 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9173 && floater_attr == PA_COMBINE_TYPE_FMPY))
9175 /* If ANCHOR and FLOATER can be combined, then we're
9176 done with this pass. */
9177 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9178 SET_DEST (PATTERN (floater)),
9179 XEXP (SET_SRC (PATTERN (floater)),
9181 XEXP (SET_SRC (PATTERN (floater)),
9188 /* FLOATER will be nonzero if we found a suitable floating
9189 insn for combination with ANCHOR. */
9191 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9192 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9194 /* Emit the new instruction and delete the old anchor. */
9195 emit_insn_before (gen_rtx_PARALLEL
9197 gen_rtvec (2, PATTERN (anchor),
9198 PATTERN (floater))),
9201 SET_INSN_DELETED (anchor);
9203 /* Emit a special USE insn for FLOATER, then delete
9204 the floating insn. */
9205 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9206 delete_insn (floater);
9211 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9214 /* Emit the new_jump instruction and delete the old anchor. */
9216 = emit_jump_insn_before (gen_rtx_PARALLEL
9218 gen_rtvec (2, PATTERN (anchor),
9219 PATTERN (floater))),
9222 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9223 SET_INSN_DELETED (anchor);
9225 /* Emit a special USE insn for FLOATER, then delete
9226 the floating insn. */
9227 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9228 delete_insn (floater);
9236 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
9239 int insn_code_number;
9242 /* Create a PARALLEL with the patterns of ANCHOR and
9243 FLOATER, try to recognize it, then test constraints
9244 for the resulting pattern.
9246 If the pattern doesn't match or the constraints
9247 aren't met keep searching for a suitable floater
9249 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9250 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9251 INSN_CODE (new_rtx) = -1;
9252 insn_code_number = recog_memoized (new_rtx);
9253 if (insn_code_number < 0
9254 || (extract_insn (new_rtx), ! constrain_operands (1)))
9268 /* There's up to three operands to consider. One
9269 output and two inputs.
9271 The output must not be used between FLOATER & ANCHOR
9272 exclusive. The inputs must not be set between
9273 FLOATER and ANCHOR exclusive. */
9275 if (reg_used_between_p (dest, start, end))
9278 if (reg_set_between_p (src1, start, end))
9281 if (reg_set_between_p (src2, start, end))
9284 /* If we get here, then everything is good. */
9288 /* Return nonzero if references for INSN are delayed.
9290 Millicode insns are actually function calls with some special
9291 constraints on arguments and register usage.
9293 Millicode calls always expect their arguments in the integer argument
9294 registers, and always return their result in %r29 (ret1). They
9295 are expected to clobber their arguments, %r1, %r29, and the return
9296 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9298 This function tells reorg that the references to arguments and
9299 millicode calls do not appear to happen until after the millicode call.
9300 This allows reorg to put insns which set the argument registers into the
9301 delay slot of the millicode call -- thus they act more like traditional
9304 Note we cannot consider side effects of the insn to be delayed because
9305 the branch and link insn will clobber the return pointer. If we happened
9306 to use the return pointer in the delay slot of the call, then we lose.
9308 get_attr_type will try to recognize the given insn, so make sure to
9309 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9312 insn_refs_are_delayed (rtx insn)
9314 return ((GET_CODE (insn) == INSN
9315 && GET_CODE (PATTERN (insn)) != SEQUENCE
9316 && GET_CODE (PATTERN (insn)) != USE
9317 && GET_CODE (PATTERN (insn)) != CLOBBER
9318 && get_attr_type (insn) == TYPE_MILLI));
9321 /* Promote the return value, but not the arguments. */
9323 static enum machine_mode
9324 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9325 enum machine_mode mode,
9326 int *punsignedp ATTRIBUTE_UNUSED,
9327 const_tree fntype ATTRIBUTE_UNUSED,
9330 if (for_return == 0)
9332 return promote_mode (type, mode, punsignedp);
9335 /* On the HP-PA the value is found in register(s) 28(-29), unless
9336 the mode is SF or DF. Then the value is returned in fr4 (32).
9338 This must perform the same promotions as PROMOTE_MODE, else promoting
9339 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9341 Small structures must be returned in a PARALLEL on PA64 in order
9342 to match the HP Compiler ABI. */
9345 pa_function_value (const_tree valtype,
9346 const_tree func ATTRIBUTE_UNUSED,
9347 bool outgoing ATTRIBUTE_UNUSED)
9349 enum machine_mode valmode;
9351 if (AGGREGATE_TYPE_P (valtype)
9352 || TREE_CODE (valtype) == COMPLEX_TYPE
9353 || TREE_CODE (valtype) == VECTOR_TYPE)
9357 /* Aggregates with a size less than or equal to 128 bits are
9358 returned in GR 28(-29). They are left justified. The pad
9359 bits are undefined. Larger aggregates are returned in
9363 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9365 for (i = 0; i < ub; i++)
9367 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9368 gen_rtx_REG (DImode, 28 + i),
9373 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9375 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9377 /* Aggregates 5 to 8 bytes in size are returned in general
9378 registers r28-r29 in the same manner as other non
9379 floating-point objects. The data is right-justified and
9380 zero-extended to 64 bits. This is opposite to the normal
9381 justification used on big endian targets and requires
9382 special treatment. */
9383 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9384 gen_rtx_REG (DImode, 28), const0_rtx);
9385 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9389 if ((INTEGRAL_TYPE_P (valtype)
9390 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9391 || POINTER_TYPE_P (valtype))
9392 valmode = word_mode;
9394 valmode = TYPE_MODE (valtype);
9396 if (TREE_CODE (valtype) == REAL_TYPE
9397 && !AGGREGATE_TYPE_P (valtype)
9398 && TYPE_MODE (valtype) != TFmode
9399 && !TARGET_SOFT_FLOAT)
9400 return gen_rtx_REG (valmode, 32);
9402 return gen_rtx_REG (valmode, 28);
9405 /* Implement the TARGET_LIBCALL_VALUE hook. */
9408 pa_libcall_value (enum machine_mode mode,
9409 const_rtx fun ATTRIBUTE_UNUSED)
9411 if (! TARGET_SOFT_FLOAT
9412 && (mode == SFmode || mode == DFmode))
9413 return gen_rtx_REG (mode, 32);
9415 return gen_rtx_REG (mode, 28);
9418 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9421 pa_function_value_regno_p (const unsigned int regno)
9424 || (! TARGET_SOFT_FLOAT && regno == 32))
9430 /* Update the data in CUM to advance over an argument
9431 of mode MODE and data type TYPE.
9432 (TYPE is null for libcalls where that information may not be available.) */
9435 pa_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9436 const_tree type, bool named ATTRIBUTE_UNUSED)
9438 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9440 cum->nargs_prototype--;
9441 cum->words += (arg_size
9442 + ((cum->words & 01)
9443 && type != NULL_TREE
9447 /* Return the location of a parameter that is passed in a register or NULL
9448 if the parameter has any component that is passed in memory.
9450 This is new code and will be pushed to into the net sources after
9453 ??? We might want to restructure this so that it looks more like other
9456 pa_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9457 const_tree type, bool named ATTRIBUTE_UNUSED)
9459 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9466 if (mode == VOIDmode)
9469 arg_size = FUNCTION_ARG_SIZE (mode, type);
9471 /* If this arg would be passed partially or totally on the stack, then
9472 this routine should return zero. pa_arg_partial_bytes will
9473 handle arguments which are split between regs and stack slots if
9474 the ABI mandates split arguments. */
9477 /* The 32-bit ABI does not split arguments. */
9478 if (cum->words + arg_size > max_arg_words)
9484 alignment = cum->words & 1;
9485 if (cum->words + alignment >= max_arg_words)
9489 /* The 32bit ABIs and the 64bit ABIs are rather different,
9490 particularly in their handling of FP registers. We might
9491 be able to cleverly share code between them, but I'm not
9492 going to bother in the hope that splitting them up results
9493 in code that is more easily understood. */
9497 /* Advance the base registers to their current locations.
9499 Remember, gprs grow towards smaller register numbers while
9500 fprs grow to higher register numbers. Also remember that
9501 although FP regs are 32-bit addressable, we pretend that
9502 the registers are 64-bits wide. */
9503 gpr_reg_base = 26 - cum->words;
9504 fpr_reg_base = 32 + cum->words;
9506 /* Arguments wider than one word and small aggregates need special
9510 || (type && (AGGREGATE_TYPE_P (type)
9511 || TREE_CODE (type) == COMPLEX_TYPE
9512 || TREE_CODE (type) == VECTOR_TYPE)))
9514 /* Double-extended precision (80-bit), quad-precision (128-bit)
9515 and aggregates including complex numbers are aligned on
9516 128-bit boundaries. The first eight 64-bit argument slots
9517 are associated one-to-one, with general registers r26
9518 through r19, and also with floating-point registers fr4
9519 through fr11. Arguments larger than one word are always
9520 passed in general registers.
9522 Using a PARALLEL with a word mode register results in left
9523 justified data on a big-endian target. */
9526 int i, offset = 0, ub = arg_size;
9528 /* Align the base register. */
9529 gpr_reg_base -= alignment;
9531 ub = MIN (ub, max_arg_words - cum->words - alignment);
9532 for (i = 0; i < ub; i++)
9534 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9535 gen_rtx_REG (DImode, gpr_reg_base),
9541 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9546 /* If the argument is larger than a word, then we know precisely
9547 which registers we must use. */
9561 /* Structures 5 to 8 bytes in size are passed in the general
9562 registers in the same manner as other non floating-point
9563 objects. The data is right-justified and zero-extended
9564 to 64 bits. This is opposite to the normal justification
9565 used on big endian targets and requires special treatment.
9566 We now define BLOCK_REG_PADDING to pad these objects.
9567 Aggregates, complex and vector types are passed in the same
9568 manner as structures. */
9570 || (type && (AGGREGATE_TYPE_P (type)
9571 || TREE_CODE (type) == COMPLEX_TYPE
9572 || TREE_CODE (type) == VECTOR_TYPE)))
9574 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9575 gen_rtx_REG (DImode, gpr_reg_base),
9577 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9582 /* We have a single word (32 bits). A simple computation
9583 will get us the register #s we need. */
9584 gpr_reg_base = 26 - cum->words;
9585 fpr_reg_base = 32 + 2 * cum->words;
9589 /* Determine if the argument needs to be passed in both general and
9590 floating point registers. */
9591 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9592 /* If we are doing soft-float with portable runtime, then there
9593 is no need to worry about FP regs. */
9594 && !TARGET_SOFT_FLOAT
9595 /* The parameter must be some kind of scalar float, else we just
9596 pass it in integer registers. */
9597 && GET_MODE_CLASS (mode) == MODE_FLOAT
9598 /* The target function must not have a prototype. */
9599 && cum->nargs_prototype <= 0
9600 /* libcalls do not need to pass items in both FP and general
9602 && type != NULL_TREE
9603 /* All this hair applies to "outgoing" args only. This includes
9604 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9606 /* Also pass outgoing floating arguments in both registers in indirect
9607 calls with the 32 bit ABI and the HP assembler since there is no
9608 way to the specify argument locations in static functions. */
9613 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9619 gen_rtx_EXPR_LIST (VOIDmode,
9620 gen_rtx_REG (mode, fpr_reg_base),
9622 gen_rtx_EXPR_LIST (VOIDmode,
9623 gen_rtx_REG (mode, gpr_reg_base),
9628 /* See if we should pass this parameter in a general register. */
9629 if (TARGET_SOFT_FLOAT
9630 /* Indirect calls in the normal 32bit ABI require all arguments
9631 to be passed in general registers. */
9632 || (!TARGET_PORTABLE_RUNTIME
9636 /* If the parameter is not a scalar floating-point parameter,
9637 then it belongs in GPRs. */
9638 || GET_MODE_CLASS (mode) != MODE_FLOAT
9639 /* Structure with single SFmode field belongs in GPR. */
9640 || (type && AGGREGATE_TYPE_P (type)))
9641 retval = gen_rtx_REG (mode, gpr_reg_base);
9643 retval = gen_rtx_REG (mode, fpr_reg_base);
9648 /* Arguments larger than one word are double word aligned. */
9651 pa_function_arg_boundary (enum machine_mode mode, const_tree type)
9653 bool singleword = (type
9654 ? (integer_zerop (TYPE_SIZE (type))
9655 || !TREE_CONSTANT (TYPE_SIZE (type))
9656 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9657 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9659 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9662 /* If this arg would be passed totally in registers or totally on the stack,
9663 then this routine should return zero. */
9666 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9667 tree type, bool named ATTRIBUTE_UNUSED)
9669 unsigned int max_arg_words = 8;
9670 unsigned int offset = 0;
9675 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9678 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9679 /* Arg fits fully into registers. */
9681 else if (cum->words + offset >= max_arg_words)
9682 /* Arg fully on the stack. */
9686 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9690 /* A get_unnamed_section callback for switching to the text section.
9692 This function is only used with SOM. Because we don't support
9693 named subspaces, we can only create a new subspace or switch back
9694 to the default text subspace. */
9697 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9699 gcc_assert (TARGET_SOM);
9702 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9704 /* We only want to emit a .nsubspa directive once at the
9705 start of the function. */
9706 cfun->machine->in_nsubspa = 1;
9708 /* Create a new subspace for the text. This provides
9709 better stub placement and one-only functions. */
9711 && DECL_ONE_ONLY (cfun->decl)
9712 && !DECL_WEAK (cfun->decl))
9714 output_section_asm_op ("\t.SPACE $TEXT$\n"
9715 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9716 "ACCESS=44,SORT=24,COMDAT");
9722 /* There isn't a current function or the body of the current
9723 function has been completed. So, we are changing to the
9724 text section to output debugging information. Thus, we
9725 need to forget that we are in the text section so that
9726 varasm.c will call us when text_section is selected again. */
9727 gcc_assert (!cfun || !cfun->machine
9728 || cfun->machine->in_nsubspa == 2);
9731 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9734 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9737 /* A get_unnamed_section callback for switching to comdat data
9738 sections. This function is only used with SOM. */
9741 som_output_comdat_data_section_asm_op (const void *data)
9744 output_section_asm_op (data);
9747 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9750 pa_som_asm_init_sections (void)
9753 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9755 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9756 is not being generated. */
9757 som_readonly_data_section
9758 = get_unnamed_section (0, output_section_asm_op,
9759 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9761 /* When secondary definitions are not supported, SOM makes readonly
9762 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9764 som_one_only_readonly_data_section
9765 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9767 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9768 "ACCESS=0x2c,SORT=16,COMDAT");
9771 /* When secondary definitions are not supported, SOM makes data one-only
9772 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9773 som_one_only_data_section
9774 = get_unnamed_section (SECTION_WRITE,
9775 som_output_comdat_data_section_asm_op,
9776 "\t.SPACE $PRIVATE$\n"
9777 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9778 "ACCESS=31,SORT=24,COMDAT");
9780 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9781 which reference data within the $TEXT$ space (for example constant
9782 strings in the $LIT$ subspace).
9784 The assemblers (GAS and HP as) both have problems with handling
9785 the difference of two symbols which is the other correct way to
9786 reference constant data during PIC code generation.
9788 So, there's no way to reference constant data which is in the
9789 $TEXT$ space during PIC generation. Instead place all constant
9790 data into the $PRIVATE$ subspace (this reduces sharing, but it
9791 works correctly). */
9792 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9794 /* We must not have a reference to an external symbol defined in a
9795 shared library in a readonly section, else the SOM linker will
9798 So, we force exception information into the data section. */
9799 exception_section = data_section;
9802 /* On hpux10, the linker will give an error if we have a reference
9803 in the read-only data section to a symbol defined in a shared
9804 library. Therefore, expressions that might require a reloc can
9805 not be placed in the read-only data section. */
9808 pa_select_section (tree exp, int reloc,
9809 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9811 if (TREE_CODE (exp) == VAR_DECL
9812 && TREE_READONLY (exp)
9813 && !TREE_THIS_VOLATILE (exp)
9814 && DECL_INITIAL (exp)
9815 && (DECL_INITIAL (exp) == error_mark_node
9816 || TREE_CONSTANT (DECL_INITIAL (exp)))
9820 && DECL_ONE_ONLY (exp)
9821 && !DECL_WEAK (exp))
9822 return som_one_only_readonly_data_section;
9824 return readonly_data_section;
9826 else if (CONSTANT_CLASS_P (exp) && !reloc)
9827 return readonly_data_section;
9829 && TREE_CODE (exp) == VAR_DECL
9830 && DECL_ONE_ONLY (exp)
9831 && !DECL_WEAK (exp))
9832 return som_one_only_data_section;
9834 return data_section;
9838 pa_globalize_label (FILE *stream, const char *name)
9840 /* We only handle DATA objects here, functions are globalized in
9841 ASM_DECLARE_FUNCTION_NAME. */
9842 if (! FUNCTION_NAME_P (name))
9844 fputs ("\t.EXPORT ", stream);
9845 assemble_name (stream, name);
9846 fputs (",DATA\n", stream);
9850 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9853 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9854 int incoming ATTRIBUTE_UNUSED)
9856 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9859 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9862 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9864 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9865 PA64 ABI says that objects larger than 128 bits are returned in memory.
9866 Note, int_size_in_bytes can return -1 if the size of the object is
9867 variable or larger than the maximum value that can be expressed as
9868 a HOST_WIDE_INT. It can also return zero for an empty type. The
9869 simplest way to handle variable and empty types is to pass them in
9870 memory. This avoids problems in defining the boundaries of argument
9871 slots, allocating registers, etc. */
9872 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9873 || int_size_in_bytes (type) <= 0);
9876 /* Structure to hold declaration and name of external symbols that are
9877 emitted by GCC. We generate a vector of these symbols and output them
9878 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9879 This avoids putting out names that are never really used. */
9881 typedef struct GTY(()) extern_symbol
9887 /* Define gc'd vector type for extern_symbol. */
9888 DEF_VEC_O(extern_symbol);
9889 DEF_VEC_ALLOC_O(extern_symbol,gc);
9891 /* Vector of extern_symbol pointers. */
9892 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9894 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9895 /* Mark DECL (name NAME) as an external reference (assembler output
9896 file FILE). This saves the names to output at the end of the file
9897 if actually referenced. */
9900 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9902 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9904 gcc_assert (file == asm_out_file);
9909 /* Output text required at the end of an assembler file.
9910 This includes deferred plabels and .import directives for
9911 all external symbols that were actually referenced. */
9914 pa_hpux_file_end (void)
9919 if (!NO_DEFERRED_PROFILE_COUNTERS)
9920 output_deferred_profile_counters ();
9922 output_deferred_plabels ();
9924 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9926 tree decl = p->decl;
9928 if (!TREE_ASM_WRITTEN (decl)
9929 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9930 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9933 VEC_free (extern_symbol, gc, extern_symbols);
9937 /* Return true if a change from mode FROM to mode TO for a register
9938 in register class RCLASS is invalid. */
9941 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9942 enum reg_class rclass)
9947 /* Reject changes to/from complex and vector modes. */
9948 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9949 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9952 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9955 /* There is no way to load QImode or HImode values directly from
9956 memory. SImode loads to the FP registers are not zero extended.
9957 On the 64-bit target, this conflicts with the definition of
9958 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9959 with different sizes in the floating-point registers. */
9960 if (MAYBE_FP_REG_CLASS_P (rclass))
9963 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9964 in specific sets of registers. Thus, we cannot allow changing
9965 to a larger mode when it's larger than a word. */
9966 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9967 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9973 /* Returns TRUE if it is a good idea to tie two pseudo registers
9974 when one has mode MODE1 and one has mode MODE2.
9975 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9976 for any hard reg, then this must be FALSE for correct output.
9978 We should return FALSE for QImode and HImode because these modes
9979 are not ok in the floating-point registers. However, this prevents
9980 tieing these modes to SImode and DImode in the general registers.
9981 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9982 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9983 in the floating-point registers. */
9986 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9988 /* Don't tie modes in different classes. */
9989 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9996 /* Length in units of the trampoline instruction code. */
9998 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
10001 /* Output assembler code for a block containing the constant parts
10002 of a trampoline, leaving space for the variable parts.\
10004 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10005 and then branches to the specified routine.
10007 This code template is copied from text segment to stack location
10008 and then patched with pa_trampoline_init to contain valid values,
10009 and then entered as a subroutine.
10011 It is best to keep this as small as possible to avoid having to
10012 flush multiple lines in the cache. */
10015 pa_asm_trampoline_template (FILE *f)
10019 fputs ("\tldw 36(%r22),%r21\n", f);
10020 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
10021 if (ASSEMBLER_DIALECT == 0)
10022 fputs ("\tdepi 0,31,2,%r21\n", f);
10024 fputs ("\tdepwi 0,31,2,%r21\n", f);
10025 fputs ("\tldw 4(%r21),%r19\n", f);
10026 fputs ("\tldw 0(%r21),%r21\n", f);
10029 fputs ("\tbve (%r21)\n", f);
10030 fputs ("\tldw 40(%r22),%r29\n", f);
10031 fputs ("\t.word 0\n", f);
10032 fputs ("\t.word 0\n", f);
10036 fputs ("\tldsid (%r21),%r1\n", f);
10037 fputs ("\tmtsp %r1,%sr0\n", f);
10038 fputs ("\tbe 0(%sr0,%r21)\n", f);
10039 fputs ("\tldw 40(%r22),%r29\n", f);
10041 fputs ("\t.word 0\n", f);
10042 fputs ("\t.word 0\n", f);
10043 fputs ("\t.word 0\n", f);
10044 fputs ("\t.word 0\n", f);
10048 fputs ("\t.dword 0\n", f);
10049 fputs ("\t.dword 0\n", f);
10050 fputs ("\t.dword 0\n", f);
10051 fputs ("\t.dword 0\n", f);
10052 fputs ("\tmfia %r31\n", f);
10053 fputs ("\tldd 24(%r31),%r1\n", f);
10054 fputs ("\tldd 24(%r1),%r27\n", f);
10055 fputs ("\tldd 16(%r1),%r1\n", f);
10056 fputs ("\tbve (%r1)\n", f);
10057 fputs ("\tldd 32(%r31),%r31\n", f);
10058 fputs ("\t.dword 0 ; fptr\n", f);
10059 fputs ("\t.dword 0 ; static link\n", f);
10063 /* Emit RTL insns to initialize the variable parts of a trampoline.
10064 FNADDR is an RTX for the address of the function's pure code.
10065 CXT is an RTX for the static chain value for the function.
10067 Move the function address to the trampoline template at offset 36.
10068 Move the static chain value to trampoline template at offset 40.
10069 Move the trampoline address to trampoline template at offset 44.
10070 Move r19 to trampoline template at offset 48. The latter two
10071 words create a plabel for the indirect call to the trampoline.
10073 A similar sequence is used for the 64-bit port but the plabel is
10074 at the beginning of the trampoline.
10076 Finally, the cache entries for the trampoline code are flushed.
10077 This is necessary to ensure that the trampoline instruction sequence
10078 is written to memory prior to any attempts at prefetching the code
10082 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10084 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10085 rtx start_addr = gen_reg_rtx (Pmode);
10086 rtx end_addr = gen_reg_rtx (Pmode);
10087 rtx line_length = gen_reg_rtx (Pmode);
10090 emit_block_move (m_tramp, assemble_trampoline_template (),
10091 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10092 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10096 tmp = adjust_address (m_tramp, Pmode, 36);
10097 emit_move_insn (tmp, fnaddr);
10098 tmp = adjust_address (m_tramp, Pmode, 40);
10099 emit_move_insn (tmp, chain_value);
10101 /* Create a fat pointer for the trampoline. */
10102 tmp = adjust_address (m_tramp, Pmode, 44);
10103 emit_move_insn (tmp, r_tramp);
10104 tmp = adjust_address (m_tramp, Pmode, 48);
10105 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10107 /* fdc and fic only use registers for the address to flush,
10108 they do not accept integer displacements. We align the
10109 start and end addresses to the beginning of their respective
10110 cache lines to minimize the number of lines flushed. */
10111 emit_insn (gen_andsi3 (start_addr, r_tramp,
10112 GEN_INT (-MIN_CACHELINE_SIZE)));
10113 tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1));
10114 emit_insn (gen_andsi3 (end_addr, tmp,
10115 GEN_INT (-MIN_CACHELINE_SIZE)));
10116 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10117 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10118 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10119 gen_reg_rtx (Pmode),
10120 gen_reg_rtx (Pmode)));
10124 tmp = adjust_address (m_tramp, Pmode, 56);
10125 emit_move_insn (tmp, fnaddr);
10126 tmp = adjust_address (m_tramp, Pmode, 64);
10127 emit_move_insn (tmp, chain_value);
10129 /* Create a fat pointer for the trampoline. */
10130 tmp = adjust_address (m_tramp, Pmode, 16);
10131 emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32)));
10132 tmp = adjust_address (m_tramp, Pmode, 24);
10133 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10135 /* fdc and fic only use registers for the address to flush,
10136 they do not accept integer displacements. We align the
10137 start and end addresses to the beginning of their respective
10138 cache lines to minimize the number of lines flushed. */
10139 tmp = force_reg (Pmode, plus_constant (r_tramp, 32));
10140 emit_insn (gen_anddi3 (start_addr, tmp,
10141 GEN_INT (-MIN_CACHELINE_SIZE)));
10142 tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1));
10143 emit_insn (gen_anddi3 (end_addr, tmp,
10144 GEN_INT (-MIN_CACHELINE_SIZE)));
10145 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10146 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10147 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10148 gen_reg_rtx (Pmode),
10149 gen_reg_rtx (Pmode)));
10153 /* Perform any machine-specific adjustment in the address of the trampoline.
10154 ADDR contains the address that was passed to pa_trampoline_init.
10155 Adjust the trampoline address to point to the plabel at offset 44. */
10158 pa_trampoline_adjust_address (rtx addr)
10161 addr = memory_address (Pmode, plus_constant (addr, 46));
10166 pa_delegitimize_address (rtx orig_x)
10168 rtx x = delegitimize_mem_from_attrs (orig_x);
10170 if (GET_CODE (x) == LO_SUM
10171 && GET_CODE (XEXP (x, 1)) == UNSPEC
10172 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10173 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10178 pa_internal_arg_pointer (void)
10180 /* The argument pointer and the hard frame pointer are the same in
10181 the 32-bit runtime, so we don't need a copy. */
10183 return copy_to_reg (virtual_incoming_args_rtx);
10185 return virtual_incoming_args_rtx;
10188 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10189 Frame pointer elimination is automatically handled. */
10192 pa_can_eliminate (const int from, const int to)
10194 /* The argument cannot be eliminated in the 64-bit runtime. */
10195 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10198 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10199 ? ! frame_pointer_needed
10203 /* Define the offset between two registers, FROM to be eliminated and its
10204 replacement TO, at the start of a routine. */
10206 pa_initial_elimination_offset (int from, int to)
10208 HOST_WIDE_INT offset;
10210 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10211 && to == STACK_POINTER_REGNUM)
10212 offset = -compute_frame_size (get_frame_size (), 0);
10213 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10216 gcc_unreachable ();
10222 pa_conditional_register_usage (void)
10226 if (!TARGET_64BIT && !TARGET_PA_11)
10228 for (i = 56; i <= FP_REG_LAST; i++)
10229 fixed_regs[i] = call_used_regs[i] = 1;
10230 for (i = 33; i < 56; i += 2)
10231 fixed_regs[i] = call_used_regs[i] = 1;
10233 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10235 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10236 fixed_regs[i] = call_used_regs[i] = 1;
10239 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10242 /* Target hook for c_mode_for_suffix. */
10244 static enum machine_mode
10245 pa_c_mode_for_suffix (char suffix)
10247 if (HPUX_LONG_DOUBLE_LIBRARY)
10256 /* Target hook for function_section. */
10259 pa_function_section (tree decl, enum node_frequency freq,
10260 bool startup, bool exit)
10262 /* Put functions in text section if target doesn't have named sections. */
10263 if (!targetm.have_named_sections)
10264 return text_section;
10266 /* Force nested functions into the same section as the containing
10269 && DECL_SECTION_NAME (decl) == NULL_TREE
10270 && DECL_CONTEXT (decl) != NULL_TREE
10271 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10272 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL_TREE)
10273 return function_section (DECL_CONTEXT (decl));
10275 /* Otherwise, use the default function section. */
10276 return default_function_section (decl, freq, startup, exit);