1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
46 #include "sched-int.h"
49 #include "target-def.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
57 /* This is used for communication between ASM_OUTPUT_LABEL and
58 ASM_OUTPUT_LABELREF. */
59 int ia64_asm_output_label = 0;
61 /* Define the information needed to generate branch and scc insns. This is
62 stored from the compare operation. */
63 struct rtx_def * ia64_compare_op0;
64 struct rtx_def * ia64_compare_op1;
66 /* Register names for ia64_expand_prologue. */
67 static const char * const ia64_reg_numbers[96] =
68 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
69 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
70 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
71 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
72 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
73 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
74 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
75 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
76 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
77 "r104","r105","r106","r107","r108","r109","r110","r111",
78 "r112","r113","r114","r115","r116","r117","r118","r119",
79 "r120","r121","r122","r123","r124","r125","r126","r127"};
81 /* ??? These strings could be shared with REGISTER_NAMES. */
82 static const char * const ia64_input_reg_names[8] =
83 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
85 /* ??? These strings could be shared with REGISTER_NAMES. */
86 static const char * const ia64_local_reg_names[80] =
87 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
88 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
89 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
90 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
91 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
92 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
93 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
94 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
95 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
96 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
98 /* ??? These strings could be shared with REGISTER_NAMES. */
99 static const char * const ia64_output_reg_names[8] =
100 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
102 /* Which cpu are we scheduling for. */
103 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
105 /* Determines whether we run our final scheduling pass or not. We always
106 avoid the normal second scheduling pass. */
107 static int ia64_flag_schedule_insns2;
109 /* Determines whether we run variable tracking in machine dependent
111 static int ia64_flag_var_tracking;
113 /* Variables which are this size or smaller are put in the sdata/sbss
116 unsigned int ia64_section_threshold;
118 /* The following variable is used by the DFA insn scheduler. The value is
119 TRUE if we do insn bundling instead of insn scheduling. */
122 /* Structure to be filled in by ia64_compute_frame_size with register
123 save masks and offsets for the current function. */
125 struct ia64_frame_info
127 HOST_WIDE_INT total_size; /* size of the stack frame, not including
128 the caller's scratch area. */
129 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
130 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
131 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
132 HARD_REG_SET mask; /* mask of saved registers. */
133 unsigned int gr_used_mask; /* mask of registers in use as gr spill
134 registers or long-term scratches. */
135 int n_spilled; /* number of spilled registers. */
136 int reg_fp; /* register for fp. */
137 int reg_save_b0; /* save register for b0. */
138 int reg_save_pr; /* save register for prs. */
139 int reg_save_ar_pfs; /* save register for ar.pfs. */
140 int reg_save_ar_unat; /* save register for ar.unat. */
141 int reg_save_ar_lc; /* save register for ar.lc. */
142 int reg_save_gp; /* save register for gp. */
143 int n_input_regs; /* number of input registers used. */
144 int n_local_regs; /* number of local registers used. */
145 int n_output_regs; /* number of output registers used. */
146 int n_rotate_regs; /* number of rotating registers used. */
148 char need_regstk; /* true if a .regstk directive needed. */
149 char initialized; /* true if the data is finalized. */
152 /* Current frame information calculated by ia64_compute_frame_size. */
153 static struct ia64_frame_info current_frame_info;
155 static int ia64_first_cycle_multipass_dfa_lookahead (void);
156 static void ia64_dependencies_evaluation_hook (rtx, rtx);
157 static void ia64_init_dfa_pre_cycle_insn (void);
158 static rtx ia64_dfa_pre_cycle_insn (void);
159 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
160 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
161 static rtx gen_tls_get_addr (void);
162 static rtx gen_thread_pointer (void);
163 static int find_gr_spill (int);
164 static int next_scratch_gr_reg (void);
165 static void mark_reg_gr_used_mask (rtx, void *);
166 static void ia64_compute_frame_size (HOST_WIDE_INT);
167 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
168 static void finish_spill_pointers (void);
169 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
170 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
171 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
172 static rtx gen_movdi_x (rtx, rtx, rtx);
173 static rtx gen_fr_spill_x (rtx, rtx, rtx);
174 static rtx gen_fr_restore_x (rtx, rtx, rtx);
176 static enum machine_mode hfa_element_mode (tree, bool);
177 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
179 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
181 static bool ia64_function_ok_for_sibcall (tree, tree);
182 static bool ia64_return_in_memory (tree, tree);
183 static bool ia64_rtx_costs (rtx, int, int, int *);
184 static void fix_range (const char *);
185 static bool ia64_handle_option (size_t, const char *, int);
186 static struct machine_function * ia64_init_machine_status (void);
187 static void emit_insn_group_barriers (FILE *);
188 static void emit_all_insn_group_barriers (FILE *);
189 static void final_emit_insn_group_barriers (FILE *);
190 static void emit_predicate_relation_info (void);
191 static void ia64_reorg (void);
192 static bool ia64_in_small_data_p (tree);
193 static void process_epilogue (void);
194 static int process_set (FILE *, rtx);
196 static bool ia64_assemble_integer (rtx, unsigned int, int);
197 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
198 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
199 static void ia64_output_function_end_prologue (FILE *);
201 static int ia64_issue_rate (void);
202 static int ia64_adjust_cost (rtx, rtx, rtx, int);
203 static void ia64_sched_init (FILE *, int, int);
204 static void ia64_sched_finish (FILE *, int);
205 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
206 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
207 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
208 static int ia64_variable_issue (FILE *, int, rtx, int);
210 static struct bundle_state *get_free_bundle_state (void);
211 static void free_bundle_state (struct bundle_state *);
212 static void initiate_bundle_states (void);
213 static void finish_bundle_states (void);
214 static unsigned bundle_state_hash (const void *);
215 static int bundle_state_eq_p (const void *, const void *);
216 static int insert_bundle_state (struct bundle_state *);
217 static void initiate_bundle_state_table (void);
218 static void finish_bundle_state_table (void);
219 static int try_issue_nops (struct bundle_state *, int);
220 static int try_issue_insn (struct bundle_state *, rtx);
221 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
222 static int get_max_pos (state_t);
223 static int get_template (state_t, int);
225 static rtx get_next_important_insn (rtx, rtx);
226 static void bundling (FILE *, int, rtx, rtx);
228 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
229 HOST_WIDE_INT, tree);
230 static void ia64_file_start (void);
232 static void ia64_select_rtx_section (enum machine_mode, rtx,
233 unsigned HOST_WIDE_INT);
234 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
236 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
238 static void ia64_rwreloc_unique_section (tree, int)
240 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
241 unsigned HOST_WIDE_INT)
243 static unsigned int ia64_section_type_flags (tree, const char *, int);
244 static void ia64_hpux_add_extern_decl (tree decl)
246 static void ia64_hpux_file_end (void)
248 static void ia64_init_libfuncs (void)
250 static void ia64_hpux_init_libfuncs (void)
252 static void ia64_sysv4_init_libfuncs (void)
254 static void ia64_vms_init_libfuncs (void)
257 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
258 static void ia64_encode_section_info (tree, rtx, int);
259 static rtx ia64_struct_value_rtx (tree, int);
260 static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
261 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
262 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
263 static bool ia64_cannot_force_const_mem (rtx);
264 static const char *ia64_mangle_fundamental_type (tree);
265 static const char *ia64_invalid_conversion (tree, tree);
266 static const char *ia64_invalid_unary_op (int, tree);
267 static const char *ia64_invalid_binary_op (int, tree, tree);
269 /* Table of valid machine attributes. */
270 static const struct attribute_spec ia64_attribute_table[] =
272 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
273 { "syscall_linkage", 0, 0, false, true, true, NULL },
274 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
275 { NULL, 0, 0, false, false, false, NULL }
278 /* Initialize the GCC target structure. */
279 #undef TARGET_ATTRIBUTE_TABLE
280 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
282 #undef TARGET_INIT_BUILTINS
283 #define TARGET_INIT_BUILTINS ia64_init_builtins
285 #undef TARGET_EXPAND_BUILTIN
286 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
288 #undef TARGET_ASM_BYTE_OP
289 #define TARGET_ASM_BYTE_OP "\tdata1\t"
290 #undef TARGET_ASM_ALIGNED_HI_OP
291 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
292 #undef TARGET_ASM_ALIGNED_SI_OP
293 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
294 #undef TARGET_ASM_ALIGNED_DI_OP
295 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
296 #undef TARGET_ASM_UNALIGNED_HI_OP
297 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
298 #undef TARGET_ASM_UNALIGNED_SI_OP
299 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
300 #undef TARGET_ASM_UNALIGNED_DI_OP
301 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
302 #undef TARGET_ASM_INTEGER
303 #define TARGET_ASM_INTEGER ia64_assemble_integer
305 #undef TARGET_ASM_FUNCTION_PROLOGUE
306 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
307 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
308 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
309 #undef TARGET_ASM_FUNCTION_EPILOGUE
310 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
312 #undef TARGET_IN_SMALL_DATA_P
313 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
315 #undef TARGET_SCHED_ADJUST_COST
316 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
317 #undef TARGET_SCHED_ISSUE_RATE
318 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
319 #undef TARGET_SCHED_VARIABLE_ISSUE
320 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
321 #undef TARGET_SCHED_INIT
322 #define TARGET_SCHED_INIT ia64_sched_init
323 #undef TARGET_SCHED_FINISH
324 #define TARGET_SCHED_FINISH ia64_sched_finish
325 #undef TARGET_SCHED_REORDER
326 #define TARGET_SCHED_REORDER ia64_sched_reorder
327 #undef TARGET_SCHED_REORDER2
328 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
330 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
331 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
333 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
334 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
336 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
337 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
338 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
339 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
341 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
342 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
343 ia64_first_cycle_multipass_dfa_lookahead_guard
345 #undef TARGET_SCHED_DFA_NEW_CYCLE
346 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
348 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
349 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
350 #undef TARGET_ARG_PARTIAL_BYTES
351 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
353 #undef TARGET_ASM_OUTPUT_MI_THUNK
354 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
355 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
356 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
358 #undef TARGET_ASM_FILE_START
359 #define TARGET_ASM_FILE_START ia64_file_start
361 #undef TARGET_RTX_COSTS
362 #define TARGET_RTX_COSTS ia64_rtx_costs
363 #undef TARGET_ADDRESS_COST
364 #define TARGET_ADDRESS_COST hook_int_rtx_0
366 #undef TARGET_MACHINE_DEPENDENT_REORG
367 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
369 #undef TARGET_ENCODE_SECTION_INFO
370 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
372 #undef TARGET_SECTION_TYPE_FLAGS
373 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
376 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
377 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
380 /* ??? ABI doesn't allow us to define this. */
382 #undef TARGET_PROMOTE_FUNCTION_ARGS
383 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
386 /* ??? ABI doesn't allow us to define this. */
388 #undef TARGET_PROMOTE_FUNCTION_RETURN
389 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
392 /* ??? Investigate. */
394 #undef TARGET_PROMOTE_PROTOTYPES
395 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
398 #undef TARGET_STRUCT_VALUE_RTX
399 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
400 #undef TARGET_RETURN_IN_MEMORY
401 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
402 #undef TARGET_SETUP_INCOMING_VARARGS
403 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
404 #undef TARGET_STRICT_ARGUMENT_NAMING
405 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
406 #undef TARGET_MUST_PASS_IN_STACK
407 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
409 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
410 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
412 #undef TARGET_UNWIND_EMIT
413 #define TARGET_UNWIND_EMIT process_for_unwind_directive
415 #undef TARGET_SCALAR_MODE_SUPPORTED_P
416 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
417 #undef TARGET_VECTOR_MODE_SUPPORTED_P
418 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
420 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
421 in an order different from the specified program order. */
422 #undef TARGET_RELAXED_ORDERING
423 #define TARGET_RELAXED_ORDERING true
425 #undef TARGET_DEFAULT_TARGET_FLAGS
426 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
427 #undef TARGET_HANDLE_OPTION
428 #define TARGET_HANDLE_OPTION ia64_handle_option
430 #undef TARGET_CANNOT_FORCE_CONST_MEM
431 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
433 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
434 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ia64_mangle_fundamental_type
436 #undef TARGET_INVALID_CONVERSION
437 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
438 #undef TARGET_INVALID_UNARY_OP
439 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
440 #undef TARGET_INVALID_BINARY_OP
441 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
443 struct gcc_target targetm = TARGET_INITIALIZER;
447 ADDR_AREA_NORMAL, /* normal address area */
448 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
452 static GTY(()) tree small_ident1;
453 static GTY(()) tree small_ident2;
458 if (small_ident1 == 0)
460 small_ident1 = get_identifier ("small");
461 small_ident2 = get_identifier ("__small__");
465 /* Retrieve the address area that has been chosen for the given decl. */
467 static ia64_addr_area
468 ia64_get_addr_area (tree decl)
472 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
478 id = TREE_VALUE (TREE_VALUE (model_attr));
479 if (id == small_ident1 || id == small_ident2)
480 return ADDR_AREA_SMALL;
482 return ADDR_AREA_NORMAL;
486 ia64_handle_model_attribute (tree *node, tree name, tree args,
487 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
489 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
491 tree arg, decl = *node;
494 arg = TREE_VALUE (args);
495 if (arg == small_ident1 || arg == small_ident2)
497 addr_area = ADDR_AREA_SMALL;
501 warning (OPT_Wattributes, "invalid argument of %qs attribute",
502 IDENTIFIER_POINTER (name));
503 *no_add_attrs = true;
506 switch (TREE_CODE (decl))
509 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
511 && !TREE_STATIC (decl))
513 error ("%Jan address area attribute cannot be specified for "
514 "local variables", decl);
515 *no_add_attrs = true;
517 area = ia64_get_addr_area (decl);
518 if (area != ADDR_AREA_NORMAL && addr_area != area)
520 error ("address area of %q+D conflicts with previous "
521 "declaration", decl);
522 *no_add_attrs = true;
527 error ("%Jaddress area attribute cannot be specified for functions",
529 *no_add_attrs = true;
533 warning (OPT_Wattributes, "%qs attribute ignored",
534 IDENTIFIER_POINTER (name));
535 *no_add_attrs = true;
543 ia64_encode_addr_area (tree decl, rtx symbol)
547 flags = SYMBOL_REF_FLAGS (symbol);
548 switch (ia64_get_addr_area (decl))
550 case ADDR_AREA_NORMAL: break;
551 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
552 default: gcc_unreachable ();
554 SYMBOL_REF_FLAGS (symbol) = flags;
558 ia64_encode_section_info (tree decl, rtx rtl, int first)
560 default_encode_section_info (decl, rtl, first);
562 /* Careful not to prod global register variables. */
563 if (TREE_CODE (decl) == VAR_DECL
564 && GET_CODE (DECL_RTL (decl)) == MEM
565 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
566 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
567 ia64_encode_addr_area (decl, XEXP (rtl, 0));
570 /* Implement CONST_OK_FOR_LETTER_P. */
573 ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c)
578 return CONST_OK_FOR_I (value);
580 return CONST_OK_FOR_J (value);
582 return CONST_OK_FOR_K (value);
584 return CONST_OK_FOR_L (value);
586 return CONST_OK_FOR_M (value);
588 return CONST_OK_FOR_N (value);
590 return CONST_OK_FOR_O (value);
592 return CONST_OK_FOR_P (value);
598 /* Implement CONST_DOUBLE_OK_FOR_LETTER_P. */
601 ia64_const_double_ok_for_letter_p (rtx value, char c)
606 return CONST_DOUBLE_OK_FOR_G (value);
612 /* Implement EXTRA_CONSTRAINT. */
615 ia64_extra_constraint (rtx value, char c)
620 /* Non-volatile memory for FP_REG loads/stores. */
621 return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value);
624 /* 1..4 for shladd arguments. */
625 return (GET_CODE (value) == CONST_INT
626 && INTVAL (value) >= 1 && INTVAL (value) <= 4);
629 /* Non-post-inc memory for asms and other unsavory creatures. */
630 return (GET_CODE (value) == MEM
631 && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC
632 && (reload_in_progress || memory_operand (value, VOIDmode)));
635 /* Symbol ref to small-address-area. */
636 return small_addr_symbolic_operand (value, VOIDmode);
640 return value == CONST0_RTX (GET_MODE (value));
643 /* An integer vector, such that conversion to an integer yields a
644 value appropriate for an integer 'J' constraint. */
645 if (GET_CODE (value) == CONST_VECTOR
646 && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT)
648 value = simplify_subreg (DImode, value, GET_MODE (value), 0);
649 return ia64_const_ok_for_letter_p (INTVAL (value), 'J');
654 /* A V2SF vector containing elements that satisfy 'G'. */
656 (GET_CODE (value) == CONST_VECTOR
657 && GET_MODE (value) == V2SFmode
658 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G')
659 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G'));
666 /* Return 1 if the operands of a move are ok. */
669 ia64_move_ok (rtx dst, rtx src)
671 /* If we're under init_recog_no_volatile, we'll not be able to use
672 memory_operand. So check the code directly and don't worry about
673 the validity of the underlying address, which should have been
674 checked elsewhere anyway. */
675 if (GET_CODE (dst) != MEM)
677 if (GET_CODE (src) == MEM)
679 if (register_operand (src, VOIDmode))
682 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
683 if (INTEGRAL_MODE_P (GET_MODE (dst)))
684 return src == const0_rtx;
686 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
689 /* Return 1 if the operands are ok for a floating point load pair. */
692 ia64_load_pair_ok (rtx dst, rtx src)
694 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
696 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
698 switch (GET_CODE (XEXP (src, 0)))
707 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
709 if (GET_CODE (adjust) != CONST_INT
710 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
721 addp4_optimize_ok (rtx op1, rtx op2)
723 return (basereg_operand (op1, GET_MODE(op1)) !=
724 basereg_operand (op2, GET_MODE(op2)));
727 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
728 Return the length of the field, or <= 0 on failure. */
731 ia64_depz_field_mask (rtx rop, rtx rshift)
733 unsigned HOST_WIDE_INT op = INTVAL (rop);
734 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
736 /* Get rid of the zero bits we're shifting in. */
739 /* We must now have a solid block of 1's at bit 0. */
740 return exact_log2 (op + 1);
743 /* Return the TLS model to use for ADDR. */
745 static enum tls_model
746 tls_symbolic_operand_type (rtx addr)
748 enum tls_model tls_kind = 0;
750 if (GET_CODE (addr) == CONST)
752 if (GET_CODE (XEXP (addr, 0)) == PLUS
753 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
754 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
756 else if (GET_CODE (addr) == SYMBOL_REF)
757 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
762 /* Return true if X is a constant that is valid for some immediate
763 field in an instruction. */
766 ia64_legitimate_constant_p (rtx x)
768 switch (GET_CODE (x))
775 if (GET_MODE (x) == VOIDmode)
777 return CONST_DOUBLE_OK_FOR_G (x);
781 return tls_symbolic_operand_type (x) == 0;
785 enum machine_mode mode = GET_MODE (x);
787 if (mode == V2SFmode)
788 return ia64_extra_constraint (x, 'Y');
790 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
791 && GET_MODE_SIZE (mode) <= 8);
799 /* Don't allow TLS addresses to get spilled to memory. */
802 ia64_cannot_force_const_mem (rtx x)
804 return tls_symbolic_operand_type (x) != 0;
807 /* Expand a symbolic constant load. */
810 ia64_expand_load_address (rtx dest, rtx src)
812 gcc_assert (GET_CODE (dest) == REG);
814 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
815 having to pointer-extend the value afterward. Other forms of address
816 computation below are also more natural to compute as 64-bit quantities.
817 If we've been given an SImode destination register, change it. */
818 if (GET_MODE (dest) != Pmode)
819 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 0);
823 if (small_addr_symbolic_operand (src, VOIDmode))
827 emit_insn (gen_load_gprel64 (dest, src));
828 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
829 emit_insn (gen_load_fptr (dest, src));
830 else if (sdata_symbolic_operand (src, VOIDmode))
831 emit_insn (gen_load_gprel (dest, src));
834 HOST_WIDE_INT addend = 0;
837 /* We did split constant offsets in ia64_expand_move, and we did try
838 to keep them split in move_operand, but we also allowed reload to
839 rematerialize arbitrary constants rather than spill the value to
840 the stack and reload it. So we have to be prepared here to split
842 if (GET_CODE (src) == CONST)
844 HOST_WIDE_INT hi, lo;
846 hi = INTVAL (XEXP (XEXP (src, 0), 1));
847 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
853 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
857 tmp = gen_rtx_HIGH (Pmode, src);
858 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
859 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
861 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
862 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
866 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
867 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
874 static GTY(()) rtx gen_tls_tga;
876 gen_tls_get_addr (void)
879 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
883 static GTY(()) rtx thread_pointer_rtx;
885 gen_thread_pointer (void)
887 if (!thread_pointer_rtx)
888 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
889 return thread_pointer_rtx;
893 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
894 rtx orig_op1, HOST_WIDE_INT addend)
896 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
898 HOST_WIDE_INT addend_lo, addend_hi;
902 case TLS_MODEL_GLOBAL_DYNAMIC:
905 tga_op1 = gen_reg_rtx (Pmode);
906 emit_insn (gen_load_dtpmod (tga_op1, op1));
908 tga_op2 = gen_reg_rtx (Pmode);
909 emit_insn (gen_load_dtprel (tga_op2, op1));
911 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
912 LCT_CONST, Pmode, 2, tga_op1,
913 Pmode, tga_op2, Pmode);
915 insns = get_insns ();
918 if (GET_MODE (op0) != Pmode)
920 emit_libcall_block (insns, op0, tga_ret, op1);
923 case TLS_MODEL_LOCAL_DYNAMIC:
924 /* ??? This isn't the completely proper way to do local-dynamic
925 If the call to __tls_get_addr is used only by a single symbol,
926 then we should (somehow) move the dtprel to the second arg
927 to avoid the extra add. */
930 tga_op1 = gen_reg_rtx (Pmode);
931 emit_insn (gen_load_dtpmod (tga_op1, op1));
933 tga_op2 = const0_rtx;
935 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
936 LCT_CONST, Pmode, 2, tga_op1,
937 Pmode, tga_op2, Pmode);
939 insns = get_insns ();
942 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
944 tmp = gen_reg_rtx (Pmode);
945 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
947 if (!register_operand (op0, Pmode))
948 op0 = gen_reg_rtx (Pmode);
951 emit_insn (gen_load_dtprel (op0, op1));
952 emit_insn (gen_adddi3 (op0, tmp, op0));
955 emit_insn (gen_add_dtprel (op0, op1, tmp));
958 case TLS_MODEL_INITIAL_EXEC:
959 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
960 addend_hi = addend - addend_lo;
962 op1 = plus_constant (op1, addend_hi);
965 tmp = gen_reg_rtx (Pmode);
966 emit_insn (gen_load_tprel (tmp, op1));
968 if (!register_operand (op0, Pmode))
969 op0 = gen_reg_rtx (Pmode);
970 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
973 case TLS_MODEL_LOCAL_EXEC:
974 if (!register_operand (op0, Pmode))
975 op0 = gen_reg_rtx (Pmode);
981 emit_insn (gen_load_tprel (op0, op1));
982 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
985 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
993 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
994 orig_op0, 1, OPTAB_DIRECT);
997 if (GET_MODE (orig_op0) == Pmode)
999 return gen_lowpart (GET_MODE (orig_op0), op0);
1003 ia64_expand_move (rtx op0, rtx op1)
1005 enum machine_mode mode = GET_MODE (op0);
1007 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1008 op1 = force_reg (mode, op1);
1010 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1012 HOST_WIDE_INT addend = 0;
1013 enum tls_model tls_kind;
1016 if (GET_CODE (op1) == CONST
1017 && GET_CODE (XEXP (op1, 0)) == PLUS
1018 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1020 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1021 sym = XEXP (XEXP (op1, 0), 0);
1024 tls_kind = tls_symbolic_operand_type (sym);
1026 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1028 if (any_offset_symbol_operand (sym, mode))
1030 else if (aligned_offset_symbol_operand (sym, mode))
1032 HOST_WIDE_INT addend_lo, addend_hi;
1034 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1035 addend_hi = addend - addend_lo;
1039 op1 = plus_constant (sym, addend_hi);
1048 if (reload_completed)
1050 /* We really should have taken care of this offset earlier. */
1051 gcc_assert (addend == 0);
1052 if (ia64_expand_load_address (op0, op1))
1058 rtx subtarget = no_new_pseudos ? op0 : gen_reg_rtx (mode);
1060 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1062 op1 = expand_simple_binop (mode, PLUS, subtarget,
1063 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1072 /* Split a move from OP1 to OP0 conditional on COND. */
1075 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1077 rtx insn, first = get_last_insn ();
1079 emit_move_insn (op0, op1);
1081 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1083 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1087 /* Split a post-reload TImode or TFmode reference into two DImode
1088 components. This is made extra difficult by the fact that we do
1089 not get any scratch registers to work with, because reload cannot
1090 be prevented from giving us a scratch that overlaps the register
1091 pair involved. So instead, when addressing memory, we tweak the
1092 pointer register up and back down with POST_INCs. Or up and not
1093 back down when we can get away with it.
1095 REVERSED is true when the loads must be done in reversed order
1096 (high word first) for correctness. DEAD is true when the pointer
1097 dies with the second insn we generate and therefore the second
1098 address must not carry a postmodify.
1100 May return an insn which is to be emitted after the moves. */
1103 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1107 switch (GET_CODE (in))
1110 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1111 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1116 /* Cannot occur reversed. */
1117 gcc_assert (!reversed);
1119 if (GET_MODE (in) != TFmode)
1120 split_double (in, &out[0], &out[1]);
1122 /* split_double does not understand how to split a TFmode
1123 quantity into a pair of DImode constants. */
1126 unsigned HOST_WIDE_INT p[2];
1127 long l[4]; /* TFmode is 128 bits */
1129 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1130 real_to_target (l, &r, TFmode);
1132 if (FLOAT_WORDS_BIG_ENDIAN)
1134 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1135 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1139 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1140 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1142 out[0] = GEN_INT (p[0]);
1143 out[1] = GEN_INT (p[1]);
1149 rtx base = XEXP (in, 0);
1152 switch (GET_CODE (base))
1157 out[0] = adjust_automodify_address
1158 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1159 out[1] = adjust_automodify_address
1160 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1164 /* Reversal requires a pre-increment, which can only
1165 be done as a separate insn. */
1166 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1167 out[0] = adjust_automodify_address
1168 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1169 out[1] = adjust_address (in, DImode, 0);
1174 gcc_assert (!reversed && !dead);
1176 /* Just do the increment in two steps. */
1177 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1178 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1182 gcc_assert (!reversed && !dead);
1184 /* Add 8, subtract 24. */
1185 base = XEXP (base, 0);
1186 out[0] = adjust_automodify_address
1187 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1188 out[1] = adjust_automodify_address
1190 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1195 gcc_assert (!reversed && !dead);
1197 /* Extract and adjust the modification. This case is
1198 trickier than the others, because we might have an
1199 index register, or we might have a combined offset that
1200 doesn't fit a signed 9-bit displacement field. We can
1201 assume the incoming expression is already legitimate. */
1202 offset = XEXP (base, 1);
1203 base = XEXP (base, 0);
1205 out[0] = adjust_automodify_address
1206 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1208 if (GET_CODE (XEXP (offset, 1)) == REG)
1210 /* Can't adjust the postmodify to match. Emit the
1211 original, then a separate addition insn. */
1212 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1213 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1217 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1218 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1220 /* Again the postmodify cannot be made to match,
1221 but in this case it's more efficient to get rid
1222 of the postmodify entirely and fix up with an
1224 out[1] = adjust_automodify_address (in, DImode, base, 8);
1226 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1230 /* Combined offset still fits in the displacement field.
1231 (We cannot overflow it at the high end.) */
1232 out[1] = adjust_automodify_address
1233 (in, DImode, gen_rtx_POST_MODIFY
1234 (Pmode, base, gen_rtx_PLUS
1236 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1255 /* Split a TImode or TFmode move instruction after reload.
1256 This is used by *movtf_internal and *movti_internal. */
1258 ia64_split_tmode_move (rtx operands[])
1260 rtx in[2], out[2], insn;
1263 bool reversed = false;
1265 /* It is possible for reload to decide to overwrite a pointer with
1266 the value it points to. In that case we have to do the loads in
1267 the appropriate order so that the pointer is not destroyed too
1268 early. Also we must not generate a postmodify for that second
1269 load, or rws_access_regno will die. */
1270 if (GET_CODE (operands[1]) == MEM
1271 && reg_overlap_mentioned_p (operands[0], operands[1]))
1273 rtx base = XEXP (operands[1], 0);
1274 while (GET_CODE (base) != REG)
1275 base = XEXP (base, 0);
1277 if (REGNO (base) == REGNO (operands[0]))
1281 /* Another reason to do the moves in reversed order is if the first
1282 element of the target register pair is also the second element of
1283 the source register pair. */
1284 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1285 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1288 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1289 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1291 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1292 if (GET_CODE (EXP) == MEM \
1293 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1294 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1295 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1296 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1297 XEXP (XEXP (EXP, 0), 0), \
1300 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1301 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1302 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1304 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1305 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1306 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1309 emit_insn (fixup[0]);
1311 emit_insn (fixup[1]);
1313 #undef MAYBE_ADD_REG_INC_NOTE
1316 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1317 through memory plus an extra GR scratch register. Except that you can
1318 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1319 SECONDARY_RELOAD_CLASS, but not both.
1321 We got into problems in the first place by allowing a construct like
1322 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1323 This solution attempts to prevent this situation from occurring. When
1324 we see something like the above, we spill the inner register to memory. */
1327 spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1329 if (GET_CODE (in) == SUBREG
1330 && GET_MODE (SUBREG_REG (in)) == TImode
1331 && GET_CODE (SUBREG_REG (in)) == REG)
1333 rtx memt = assign_stack_temp (TImode, 16, 0);
1334 emit_move_insn (memt, SUBREG_REG (in));
1335 return adjust_address (memt, mode, 0);
1337 else if (force && GET_CODE (in) == REG)
1339 rtx memx = assign_stack_temp (mode, 16, 0);
1340 emit_move_insn (memx, in);
1347 /* Expand the movxf or movrf pattern (MODE says which) with the given
1348 OPERANDS, returning true if the pattern should then invoke
1352 ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1354 rtx op0 = operands[0];
1356 if (GET_CODE (op0) == SUBREG)
1357 op0 = SUBREG_REG (op0);
1359 /* We must support XFmode loads into general registers for stdarg/vararg,
1360 unprototyped calls, and a rare case where a long double is passed as
1361 an argument after a float HFA fills the FP registers. We split them into
1362 DImode loads for convenience. We also need to support XFmode stores
1363 for the last case. This case does not happen for stdarg/vararg routines,
1364 because we do a block store to memory of unnamed arguments. */
1366 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1370 /* We're hoping to transform everything that deals with XFmode
1371 quantities and GR registers early in the compiler. */
1372 gcc_assert (!no_new_pseudos);
1374 /* Struct to register can just use TImode instead. */
1375 if ((GET_CODE (operands[1]) == SUBREG
1376 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1377 || (GET_CODE (operands[1]) == REG
1378 && GR_REGNO_P (REGNO (operands[1]))))
1380 rtx op1 = operands[1];
1382 if (GET_CODE (op1) == SUBREG)
1383 op1 = SUBREG_REG (op1);
1385 op1 = gen_rtx_REG (TImode, REGNO (op1));
1387 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1391 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1393 /* Don't word-swap when reading in the constant. */
1394 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1395 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1397 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1398 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1403 /* If the quantity is in a register not known to be GR, spill it. */
1404 if (register_operand (operands[1], mode))
1405 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1407 gcc_assert (GET_CODE (operands[1]) == MEM);
1409 /* Don't word-swap when reading in the value. */
1410 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1411 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1413 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1414 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1418 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1420 /* We're hoping to transform everything that deals with XFmode
1421 quantities and GR registers early in the compiler. */
1422 gcc_assert (!no_new_pseudos);
1424 /* Op0 can't be a GR_REG here, as that case is handled above.
1425 If op0 is a register, then we spill op1, so that we now have a
1426 MEM operand. This requires creating an XFmode subreg of a TImode reg
1427 to force the spill. */
1428 if (register_operand (operands[0], mode))
1430 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1431 op1 = gen_rtx_SUBREG (mode, op1, 0);
1432 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1439 gcc_assert (GET_CODE (operands[0]) == MEM);
1441 /* Don't word-swap when writing out the value. */
1442 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1443 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1445 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1446 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1451 if (!reload_in_progress && !reload_completed)
1453 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1455 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1457 rtx memt, memx, in = operands[1];
1458 if (CONSTANT_P (in))
1459 in = validize_mem (force_const_mem (mode, in));
1460 if (GET_CODE (in) == MEM)
1461 memt = adjust_address (in, TImode, 0);
1464 memt = assign_stack_temp (TImode, 16, 0);
1465 memx = adjust_address (memt, mode, 0);
1466 emit_move_insn (memx, in);
1468 emit_move_insn (op0, memt);
1472 if (!ia64_move_ok (operands[0], operands[1]))
1473 operands[1] = force_reg (mode, operands[1]);
1479 /* Emit comparison instruction if necessary, returning the expression
1480 that holds the compare result in the proper mode. */
1482 static GTY(()) rtx cmptf_libfunc;
1485 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1487 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1490 /* If we have a BImode input, then we already have a compare result, and
1491 do not need to emit another comparison. */
1492 if (GET_MODE (op0) == BImode)
1494 gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
1497 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1498 magic number as its third argument, that indicates what to do.
1499 The return value is an integer to be compared against zero. */
1500 else if (GET_MODE (op0) == TFmode)
1503 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1509 enum rtx_code ncode;
1512 gcc_assert (cmptf_libfunc && GET_MODE (op1) == TFmode);
1515 /* 1 = equal, 0 = not equal. Equality operators do
1516 not raise FP_INVALID when given an SNaN operand. */
1517 case EQ: magic = QCMP_EQ; ncode = NE; break;
1518 case NE: magic = QCMP_EQ; ncode = EQ; break;
1519 /* isunordered() from C99. */
1520 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1521 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1522 /* Relational operators raise FP_INVALID when given
1524 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1525 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1526 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1527 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1528 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1529 Expanders for buneq etc. weuld have to be added to ia64.md
1530 for this to be useful. */
1531 default: gcc_unreachable ();
1536 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1537 op0, TFmode, op1, TFmode,
1538 GEN_INT (magic), DImode);
1539 cmp = gen_reg_rtx (BImode);
1540 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1541 gen_rtx_fmt_ee (ncode, BImode,
1544 insns = get_insns ();
1547 emit_libcall_block (insns, cmp, cmp,
1548 gen_rtx_fmt_ee (code, BImode, op0, op1));
1553 cmp = gen_reg_rtx (BImode);
1554 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1555 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1559 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1562 /* Generate an integral vector comparison. Return true if the condition has
1563 been reversed, and so the sense of the comparison should be inverted. */
1566 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1567 rtx dest, rtx op0, rtx op1)
1569 bool negate = false;
1572 /* Canonicalize the comparison to EQ, GT, GTU. */
1583 code = reverse_condition (code);
1589 code = reverse_condition (code);
1595 code = swap_condition (code);
1596 x = op0, op0 = op1, op1 = x;
1603 /* Unsigned parallel compare is not supported by the hardware. Play some
1604 tricks to turn this into a signed comparison against 0. */
1613 /* Perform a parallel modulo subtraction. */
1614 t1 = gen_reg_rtx (V2SImode);
1615 emit_insn (gen_subv2si3 (t1, op0, op1));
1617 /* Extract the original sign bit of op0. */
1618 mask = GEN_INT (-0x80000000);
1619 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1620 mask = force_reg (V2SImode, mask);
1621 t2 = gen_reg_rtx (V2SImode);
1622 emit_insn (gen_andv2si3 (t2, op0, mask));
1624 /* XOR it back into the result of the subtraction. This results
1625 in the sign bit set iff we saw unsigned underflow. */
1626 x = gen_reg_rtx (V2SImode);
1627 emit_insn (gen_xorv2si3 (x, t1, t2));
1631 op1 = CONST0_RTX (mode);
1637 /* Perform a parallel unsigned saturating subtraction. */
1638 x = gen_reg_rtx (mode);
1639 emit_insn (gen_rtx_SET (VOIDmode, x,
1640 gen_rtx_US_MINUS (mode, op0, op1)));
1644 op1 = CONST0_RTX (mode);
1653 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1654 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1659 /* Emit an integral vector conditional move. */
1662 ia64_expand_vecint_cmov (rtx operands[])
1664 enum machine_mode mode = GET_MODE (operands[0]);
1665 enum rtx_code code = GET_CODE (operands[3]);
1669 cmp = gen_reg_rtx (mode);
1670 negate = ia64_expand_vecint_compare (code, mode, cmp,
1671 operands[4], operands[5]);
1673 ot = operands[1+negate];
1674 of = operands[2-negate];
1676 if (ot == CONST0_RTX (mode))
1678 if (of == CONST0_RTX (mode))
1680 emit_move_insn (operands[0], ot);
1684 x = gen_rtx_NOT (mode, cmp);
1685 x = gen_rtx_AND (mode, x, of);
1686 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1688 else if (of == CONST0_RTX (mode))
1690 x = gen_rtx_AND (mode, cmp, ot);
1691 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1697 t = gen_reg_rtx (mode);
1698 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1699 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1701 f = gen_reg_rtx (mode);
1702 x = gen_rtx_NOT (mode, cmp);
1703 x = gen_rtx_AND (mode, x, operands[2-negate]);
1704 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1706 x = gen_rtx_IOR (mode, t, f);
1707 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1711 /* Emit an integral vector min or max operation. Return true if all done. */
1714 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1719 /* These four combinations are supported directly. */
1720 if (mode == V8QImode && (code == UMIN || code == UMAX))
1722 if (mode == V4HImode && (code == SMIN || code == SMAX))
1725 /* This combination can be implemented with only saturating subtraction. */
1726 if (mode == V4HImode && code == UMAX)
1728 rtx x, tmp = gen_reg_rtx (mode);
1730 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1731 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1733 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1737 /* Everything else implemented via vector comparisons. */
1738 xops[0] = operands[0];
1739 xops[4] = xops[1] = operands[1];
1740 xops[5] = xops[2] = operands[2];
1759 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1761 ia64_expand_vecint_cmov (xops);
1765 /* Emit an integral vector widening sum operations. */
1768 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1771 enum machine_mode wmode, mode;
1772 rtx (*unpack_l) (rtx, rtx, rtx);
1773 rtx (*unpack_h) (rtx, rtx, rtx);
1774 rtx (*plus) (rtx, rtx, rtx);
1776 wmode = GET_MODE (operands[0]);
1777 mode = GET_MODE (operands[1]);
1782 unpack_l = gen_unpack1_l;
1783 unpack_h = gen_unpack1_h;
1784 plus = gen_addv4hi3;
1787 unpack_l = gen_unpack2_l;
1788 unpack_h = gen_unpack2_h;
1789 plus = gen_addv2si3;
1795 /* Fill in x with the sign extension of each element in op1. */
1797 x = CONST0_RTX (mode);
1802 x = gen_reg_rtx (mode);
1804 neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1809 l = gen_reg_rtx (wmode);
1810 h = gen_reg_rtx (wmode);
1811 s = gen_reg_rtx (wmode);
1813 emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1814 emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1815 emit_insn (plus (s, l, operands[2]));
1816 emit_insn (plus (operands[0], h, s));
1819 /* Emit a signed or unsigned V8QI dot product operation. */
1822 ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1824 rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1826 /* Fill in x1 and x2 with the sign extension of each element. */
1828 x1 = x2 = CONST0_RTX (V8QImode);
1833 x1 = gen_reg_rtx (V8QImode);
1834 x2 = gen_reg_rtx (V8QImode);
1836 neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1837 CONST0_RTX (V8QImode));
1839 neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1840 CONST0_RTX (V8QImode));
1844 l1 = gen_reg_rtx (V4HImode);
1845 l2 = gen_reg_rtx (V4HImode);
1846 h1 = gen_reg_rtx (V4HImode);
1847 h2 = gen_reg_rtx (V4HImode);
1849 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
1850 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
1851 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
1852 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
1854 p1 = gen_reg_rtx (V2SImode);
1855 p2 = gen_reg_rtx (V2SImode);
1856 p3 = gen_reg_rtx (V2SImode);
1857 p4 = gen_reg_rtx (V2SImode);
1858 emit_insn (gen_pmpy2_r (p1, l1, l2));
1859 emit_insn (gen_pmpy2_l (p2, l1, l2));
1860 emit_insn (gen_pmpy2_r (p3, h1, h2));
1861 emit_insn (gen_pmpy2_l (p4, h1, h2));
1863 s1 = gen_reg_rtx (V2SImode);
1864 s2 = gen_reg_rtx (V2SImode);
1865 s3 = gen_reg_rtx (V2SImode);
1866 emit_insn (gen_addv2si3 (s1, p1, p2));
1867 emit_insn (gen_addv2si3 (s2, p3, p4));
1868 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
1869 emit_insn (gen_addv2si3 (operands[0], s2, s3));
1872 /* Emit the appropriate sequence for a call. */
1875 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1880 addr = XEXP (addr, 0);
1881 addr = convert_memory_address (DImode, addr);
1882 b0 = gen_rtx_REG (DImode, R_BR (0));
1884 /* ??? Should do this for functions known to bind local too. */
1885 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1888 insn = gen_sibcall_nogp (addr);
1890 insn = gen_call_nogp (addr, b0);
1892 insn = gen_call_value_nogp (retval, addr, b0);
1893 insn = emit_call_insn (insn);
1898 insn = gen_sibcall_gp (addr);
1900 insn = gen_call_gp (addr, b0);
1902 insn = gen_call_value_gp (retval, addr, b0);
1903 insn = emit_call_insn (insn);
1905 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1909 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1913 ia64_reload_gp (void)
1917 if (current_frame_info.reg_save_gp)
1918 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1921 HOST_WIDE_INT offset;
1923 offset = (current_frame_info.spill_cfa_off
1924 + current_frame_info.spill_size);
1925 if (frame_pointer_needed)
1927 tmp = hard_frame_pointer_rtx;
1932 tmp = stack_pointer_rtx;
1933 offset = current_frame_info.total_size - offset;
1936 if (CONST_OK_FOR_I (offset))
1937 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1938 tmp, GEN_INT (offset)));
1941 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1942 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1943 pic_offset_table_rtx, tmp));
1946 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1949 emit_move_insn (pic_offset_table_rtx, tmp);
1953 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1954 rtx scratch_b, int noreturn_p, int sibcall_p)
1957 bool is_desc = false;
1959 /* If we find we're calling through a register, then we're actually
1960 calling through a descriptor, so load up the values. */
1961 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1966 /* ??? We are currently constrained to *not* use peep2, because
1967 we can legitimately change the global lifetime of the GP
1968 (in the form of killing where previously live). This is
1969 because a call through a descriptor doesn't use the previous
1970 value of the GP, while a direct call does, and we do not
1971 commit to either form until the split here.
1973 That said, this means that we lack precise life info for
1974 whether ADDR is dead after this call. This is not terribly
1975 important, since we can fix things up essentially for free
1976 with the POST_DEC below, but it's nice to not use it when we
1977 can immediately tell it's not necessary. */
1978 addr_dead_p = ((noreturn_p || sibcall_p
1979 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1981 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1983 /* Load the code address into scratch_b. */
1984 tmp = gen_rtx_POST_INC (Pmode, addr);
1985 tmp = gen_rtx_MEM (Pmode, tmp);
1986 emit_move_insn (scratch_r, tmp);
1987 emit_move_insn (scratch_b, scratch_r);
1989 /* Load the GP address. If ADDR is not dead here, then we must
1990 revert the change made above via the POST_INCREMENT. */
1992 tmp = gen_rtx_POST_DEC (Pmode, addr);
1995 tmp = gen_rtx_MEM (Pmode, tmp);
1996 emit_move_insn (pic_offset_table_rtx, tmp);
2003 insn = gen_sibcall_nogp (addr);
2005 insn = gen_call_value_nogp (retval, addr, retaddr);
2007 insn = gen_call_nogp (addr, retaddr);
2008 emit_call_insn (insn);
2010 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2014 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2016 This differs from the generic code in that we know about the zero-extending
2017 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2018 also know that ld.acq+cmpxchg.rel equals a full barrier.
2020 The loop we want to generate looks like
2025 new_reg = cmp_reg op val;
2026 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2027 if (cmp_reg != old_reg)
2030 Note that we only do the plain load from memory once. Subsequent
2031 iterations use the value loaded by the compare-and-swap pattern. */
2034 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2035 rtx old_dst, rtx new_dst)
2037 enum machine_mode mode = GET_MODE (mem);
2038 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2039 enum insn_code icode;
2041 /* Special case for using fetchadd. */
2042 if ((mode == SImode || mode == DImode)
2043 && (code == PLUS || code == MINUS)
2044 && fetchadd_operand (val, mode))
2047 val = GEN_INT (-INTVAL (val));
2050 old_dst = gen_reg_rtx (mode);
2052 emit_insn (gen_memory_barrier ());
2055 icode = CODE_FOR_fetchadd_acq_si;
2057 icode = CODE_FOR_fetchadd_acq_di;
2058 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2062 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2064 if (new_reg != new_dst)
2065 emit_move_insn (new_dst, new_reg);
2070 /* Because of the volatile mem read, we get an ld.acq, which is the
2071 front half of the full barrier. The end half is the cmpxchg.rel. */
2072 gcc_assert (MEM_VOLATILE_P (mem));
2074 old_reg = gen_reg_rtx (DImode);
2075 cmp_reg = gen_reg_rtx (DImode);
2076 label = gen_label_rtx ();
2080 val = simplify_gen_subreg (DImode, val, mode, 0);
2081 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2084 emit_move_insn (cmp_reg, mem);
2088 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2089 emit_move_insn (old_reg, cmp_reg);
2090 emit_move_insn (ar_ccv, cmp_reg);
2093 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2098 new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
2101 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2102 true, OPTAB_DIRECT);
2105 new_reg = gen_lowpart (mode, new_reg);
2107 emit_move_insn (new_dst, new_reg);
2111 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2112 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2113 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2114 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2119 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2121 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2124 /* Begin the assembly file. */
2127 ia64_file_start (void)
2129 /* Variable tracking should be run after all optimizations which change order
2130 of insns. It also needs a valid CFG. This can't be done in
2131 ia64_override_options, because flag_var_tracking is finalized after
2133 ia64_flag_var_tracking = flag_var_tracking;
2134 flag_var_tracking = 0;
2136 default_file_start ();
2137 emit_safe_across_calls ();
2141 emit_safe_across_calls (void)
2143 unsigned int rs, re;
2150 while (rs < 64 && call_used_regs[PR_REG (rs)])
2154 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2158 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2162 fputc (',', asm_out_file);
2164 fprintf (asm_out_file, "p%u", rs);
2166 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2170 fputc ('\n', asm_out_file);
2173 /* Helper function for ia64_compute_frame_size: find an appropriate general
2174 register to spill some special register to. SPECIAL_SPILL_MASK contains
2175 bits in GR0 to GR31 that have already been allocated by this routine.
2176 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2179 find_gr_spill (int try_locals)
2183 /* If this is a leaf function, first try an otherwise unused
2184 call-clobbered register. */
2185 if (current_function_is_leaf)
2187 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2188 if (! regs_ever_live[regno]
2189 && call_used_regs[regno]
2190 && ! fixed_regs[regno]
2191 && ! global_regs[regno]
2192 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2194 current_frame_info.gr_used_mask |= 1 << regno;
2201 regno = current_frame_info.n_local_regs;
2202 /* If there is a frame pointer, then we can't use loc79, because
2203 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2204 reg_name switching code in ia64_expand_prologue. */
2205 if (regno < (80 - frame_pointer_needed))
2207 current_frame_info.n_local_regs = regno + 1;
2208 return LOC_REG (0) + regno;
2212 /* Failed to find a general register to spill to. Must use stack. */
2216 /* In order to make for nice schedules, we try to allocate every temporary
2217 to a different register. We must of course stay away from call-saved,
2218 fixed, and global registers. We must also stay away from registers
2219 allocated in current_frame_info.gr_used_mask, since those include regs
2220 used all through the prologue.
2222 Any register allocated here must be used immediately. The idea is to
2223 aid scheduling, not to solve data flow problems. */
2225 static int last_scratch_gr_reg;
2228 next_scratch_gr_reg (void)
2232 for (i = 0; i < 32; ++i)
2234 regno = (last_scratch_gr_reg + i + 1) & 31;
2235 if (call_used_regs[regno]
2236 && ! fixed_regs[regno]
2237 && ! global_regs[regno]
2238 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2240 last_scratch_gr_reg = regno;
2245 /* There must be _something_ available. */
2249 /* Helper function for ia64_compute_frame_size, called through
2250 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2253 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2255 unsigned int regno = REGNO (reg);
2258 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2259 for (i = 0; i < n; ++i)
2260 current_frame_info.gr_used_mask |= 1 << (regno + i);
2264 /* Returns the number of bytes offset between the frame pointer and the stack
2265 pointer for the current function. SIZE is the number of bytes of space
2266 needed for local variables. */
2269 ia64_compute_frame_size (HOST_WIDE_INT size)
2271 HOST_WIDE_INT total_size;
2272 HOST_WIDE_INT spill_size = 0;
2273 HOST_WIDE_INT extra_spill_size = 0;
2274 HOST_WIDE_INT pretend_args_size;
2277 int spilled_gr_p = 0;
2278 int spilled_fr_p = 0;
2282 if (current_frame_info.initialized)
2285 memset (¤t_frame_info, 0, sizeof current_frame_info);
2286 CLEAR_HARD_REG_SET (mask);
2288 /* Don't allocate scratches to the return register. */
2289 diddle_return_value (mark_reg_gr_used_mask, NULL);
2291 /* Don't allocate scratches to the EH scratch registers. */
2292 if (cfun->machine->ia64_eh_epilogue_sp)
2293 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2294 if (cfun->machine->ia64_eh_epilogue_bsp)
2295 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2297 /* Find the size of the register stack frame. We have only 80 local
2298 registers, because we reserve 8 for the inputs and 8 for the
2301 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2302 since we'll be adjusting that down later. */
2303 regno = LOC_REG (78) + ! frame_pointer_needed;
2304 for (; regno >= LOC_REG (0); regno--)
2305 if (regs_ever_live[regno])
2307 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2309 /* For functions marked with the syscall_linkage attribute, we must mark
2310 all eight input registers as in use, so that locals aren't visible to
2313 if (cfun->machine->n_varargs > 0
2314 || lookup_attribute ("syscall_linkage",
2315 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2316 current_frame_info.n_input_regs = 8;
2319 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2320 if (regs_ever_live[regno])
2322 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2325 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2326 if (regs_ever_live[regno])
2328 i = regno - OUT_REG (0) + 1;
2330 #ifndef PROFILE_HOOK
2331 /* When -p profiling, we need one output register for the mcount argument.
2332 Likewise for -a profiling for the bb_init_func argument. For -ax
2333 profiling, we need two output registers for the two bb_init_trace_func
2335 if (current_function_profile)
2338 current_frame_info.n_output_regs = i;
2340 /* ??? No rotating register support yet. */
2341 current_frame_info.n_rotate_regs = 0;
2343 /* Discover which registers need spilling, and how much room that
2344 will take. Begin with floating point and general registers,
2345 which will always wind up on the stack. */
2347 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2348 if (regs_ever_live[regno] && ! call_used_regs[regno])
2350 SET_HARD_REG_BIT (mask, regno);
2356 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2357 if (regs_ever_live[regno] && ! call_used_regs[regno])
2359 SET_HARD_REG_BIT (mask, regno);
2365 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2366 if (regs_ever_live[regno] && ! call_used_regs[regno])
2368 SET_HARD_REG_BIT (mask, regno);
2373 /* Now come all special registers that might get saved in other
2374 general registers. */
2376 if (frame_pointer_needed)
2378 current_frame_info.reg_fp = find_gr_spill (1);
2379 /* If we did not get a register, then we take LOC79. This is guaranteed
2380 to be free, even if regs_ever_live is already set, because this is
2381 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2382 as we don't count loc79 above. */
2383 if (current_frame_info.reg_fp == 0)
2385 current_frame_info.reg_fp = LOC_REG (79);
2386 current_frame_info.n_local_regs++;
2390 if (! current_function_is_leaf)
2392 /* Emit a save of BR0 if we call other functions. Do this even
2393 if this function doesn't return, as EH depends on this to be
2394 able to unwind the stack. */
2395 SET_HARD_REG_BIT (mask, BR_REG (0));
2397 current_frame_info.reg_save_b0 = find_gr_spill (1);
2398 if (current_frame_info.reg_save_b0 == 0)
2404 /* Similarly for ar.pfs. */
2405 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2406 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2407 if (current_frame_info.reg_save_ar_pfs == 0)
2409 extra_spill_size += 8;
2413 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2414 registers are clobbered, so we fall back to the stack. */
2415 current_frame_info.reg_save_gp
2416 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2417 if (current_frame_info.reg_save_gp == 0)
2419 SET_HARD_REG_BIT (mask, GR_REG (1));
2426 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2428 SET_HARD_REG_BIT (mask, BR_REG (0));
2433 if (regs_ever_live[AR_PFS_REGNUM])
2435 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2436 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2437 if (current_frame_info.reg_save_ar_pfs == 0)
2439 extra_spill_size += 8;
2445 /* Unwind descriptor hackery: things are most efficient if we allocate
2446 consecutive GR save registers for RP, PFS, FP in that order. However,
2447 it is absolutely critical that FP get the only hard register that's
2448 guaranteed to be free, so we allocated it first. If all three did
2449 happen to be allocated hard regs, and are consecutive, rearrange them
2450 into the preferred order now. */
2451 if (current_frame_info.reg_fp != 0
2452 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2453 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2455 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2456 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2457 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2460 /* See if we need to store the predicate register block. */
2461 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2462 if (regs_ever_live[regno] && ! call_used_regs[regno])
2464 if (regno <= PR_REG (63))
2466 SET_HARD_REG_BIT (mask, PR_REG (0));
2467 current_frame_info.reg_save_pr = find_gr_spill (1);
2468 if (current_frame_info.reg_save_pr == 0)
2470 extra_spill_size += 8;
2474 /* ??? Mark them all as used so that register renaming and such
2475 are free to use them. */
2476 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2477 regs_ever_live[regno] = 1;
2480 /* If we're forced to use st8.spill, we're forced to save and restore
2481 ar.unat as well. The check for existing liveness allows inline asm
2482 to touch ar.unat. */
2483 if (spilled_gr_p || cfun->machine->n_varargs
2484 || regs_ever_live[AR_UNAT_REGNUM])
2486 regs_ever_live[AR_UNAT_REGNUM] = 1;
2487 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2488 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2489 if (current_frame_info.reg_save_ar_unat == 0)
2491 extra_spill_size += 8;
2496 if (regs_ever_live[AR_LC_REGNUM])
2498 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2499 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2500 if (current_frame_info.reg_save_ar_lc == 0)
2502 extra_spill_size += 8;
2507 /* If we have an odd number of words of pretend arguments written to
2508 the stack, then the FR save area will be unaligned. We round the
2509 size of this area up to keep things 16 byte aligned. */
2511 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2513 pretend_args_size = current_function_pretend_args_size;
2515 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2516 + current_function_outgoing_args_size);
2517 total_size = IA64_STACK_ALIGN (total_size);
2519 /* We always use the 16-byte scratch area provided by the caller, but
2520 if we are a leaf function, there's no one to which we need to provide
2522 if (current_function_is_leaf)
2523 total_size = MAX (0, total_size - 16);
2525 current_frame_info.total_size = total_size;
2526 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2527 current_frame_info.spill_size = spill_size;
2528 current_frame_info.extra_spill_size = extra_spill_size;
2529 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2530 current_frame_info.n_spilled = n_spilled;
2531 current_frame_info.initialized = reload_completed;
2534 /* Compute the initial difference between the specified pair of registers. */
2537 ia64_initial_elimination_offset (int from, int to)
2539 HOST_WIDE_INT offset;
2541 ia64_compute_frame_size (get_frame_size ());
2544 case FRAME_POINTER_REGNUM:
2547 case HARD_FRAME_POINTER_REGNUM:
2548 if (current_function_is_leaf)
2549 offset = -current_frame_info.total_size;
2551 offset = -(current_frame_info.total_size
2552 - current_function_outgoing_args_size - 16);
2555 case STACK_POINTER_REGNUM:
2556 if (current_function_is_leaf)
2559 offset = 16 + current_function_outgoing_args_size;
2567 case ARG_POINTER_REGNUM:
2568 /* Arguments start above the 16 byte save area, unless stdarg
2569 in which case we store through the 16 byte save area. */
2572 case HARD_FRAME_POINTER_REGNUM:
2573 offset = 16 - current_function_pretend_args_size;
2576 case STACK_POINTER_REGNUM:
2577 offset = (current_frame_info.total_size
2578 + 16 - current_function_pretend_args_size);
2593 /* If there are more than a trivial number of register spills, we use
2594 two interleaved iterators so that we can get two memory references
2597 In order to simplify things in the prologue and epilogue expanders,
2598 we use helper functions to fix up the memory references after the
2599 fact with the appropriate offsets to a POST_MODIFY memory mode.
2600 The following data structure tracks the state of the two iterators
2601 while insns are being emitted. */
2603 struct spill_fill_data
2605 rtx init_after; /* point at which to emit initializations */
2606 rtx init_reg[2]; /* initial base register */
2607 rtx iter_reg[2]; /* the iterator registers */
2608 rtx *prev_addr[2]; /* address of last memory use */
2609 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2610 HOST_WIDE_INT prev_off[2]; /* last offset */
2611 int n_iter; /* number of iterators in use */
2612 int next_iter; /* next iterator to use */
2613 unsigned int save_gr_used_mask;
2616 static struct spill_fill_data spill_fill_data;
2619 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2623 spill_fill_data.init_after = get_last_insn ();
2624 spill_fill_data.init_reg[0] = init_reg;
2625 spill_fill_data.init_reg[1] = init_reg;
2626 spill_fill_data.prev_addr[0] = NULL;
2627 spill_fill_data.prev_addr[1] = NULL;
2628 spill_fill_data.prev_insn[0] = NULL;
2629 spill_fill_data.prev_insn[1] = NULL;
2630 spill_fill_data.prev_off[0] = cfa_off;
2631 spill_fill_data.prev_off[1] = cfa_off;
2632 spill_fill_data.next_iter = 0;
2633 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2635 spill_fill_data.n_iter = 1 + (n_spills > 2);
2636 for (i = 0; i < spill_fill_data.n_iter; ++i)
2638 int regno = next_scratch_gr_reg ();
2639 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2640 current_frame_info.gr_used_mask |= 1 << regno;
2645 finish_spill_pointers (void)
2647 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2651 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2653 int iter = spill_fill_data.next_iter;
2654 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2655 rtx disp_rtx = GEN_INT (disp);
2658 if (spill_fill_data.prev_addr[iter])
2660 if (CONST_OK_FOR_N (disp))
2662 *spill_fill_data.prev_addr[iter]
2663 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2664 gen_rtx_PLUS (DImode,
2665 spill_fill_data.iter_reg[iter],
2667 REG_NOTES (spill_fill_data.prev_insn[iter])
2668 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2669 REG_NOTES (spill_fill_data.prev_insn[iter]));
2673 /* ??? Could use register post_modify for loads. */
2674 if (! CONST_OK_FOR_I (disp))
2676 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2677 emit_move_insn (tmp, disp_rtx);
2680 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2681 spill_fill_data.iter_reg[iter], disp_rtx));
2684 /* Micro-optimization: if we've created a frame pointer, it's at
2685 CFA 0, which may allow the real iterator to be initialized lower,
2686 slightly increasing parallelism. Also, if there are few saves
2687 it may eliminate the iterator entirely. */
2689 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2690 && frame_pointer_needed)
2692 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2693 set_mem_alias_set (mem, get_varargs_alias_set ());
2701 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2702 spill_fill_data.init_reg[iter]);
2707 if (! CONST_OK_FOR_I (disp))
2709 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2710 emit_move_insn (tmp, disp_rtx);
2714 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2715 spill_fill_data.init_reg[iter],
2722 /* Careful for being the first insn in a sequence. */
2723 if (spill_fill_data.init_after)
2724 insn = emit_insn_after (seq, spill_fill_data.init_after);
2727 rtx first = get_insns ();
2729 insn = emit_insn_before (seq, first);
2731 insn = emit_insn (seq);
2733 spill_fill_data.init_after = insn;
2735 /* If DISP is 0, we may or may not have a further adjustment
2736 afterward. If we do, then the load/store insn may be modified
2737 to be a post-modify. If we don't, then this copy may be
2738 eliminated by copyprop_hardreg_forward, which makes this
2739 insn garbage, which runs afoul of the sanity check in
2740 propagate_one_insn. So mark this insn as legal to delete. */
2742 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2746 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2748 /* ??? Not all of the spills are for varargs, but some of them are.
2749 The rest of the spills belong in an alias set of their own. But
2750 it doesn't actually hurt to include them here. */
2751 set_mem_alias_set (mem, get_varargs_alias_set ());
2753 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2754 spill_fill_data.prev_off[iter] = cfa_off;
2756 if (++iter >= spill_fill_data.n_iter)
2758 spill_fill_data.next_iter = iter;
2764 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2767 int iter = spill_fill_data.next_iter;
2770 mem = spill_restore_mem (reg, cfa_off);
2771 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2772 spill_fill_data.prev_insn[iter] = insn;
2779 RTX_FRAME_RELATED_P (insn) = 1;
2781 /* Don't even pretend that the unwind code can intuit its way
2782 through a pair of interleaved post_modify iterators. Just
2783 provide the correct answer. */
2785 if (frame_pointer_needed)
2787 base = hard_frame_pointer_rtx;
2792 base = stack_pointer_rtx;
2793 off = current_frame_info.total_size - cfa_off;
2797 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2798 gen_rtx_SET (VOIDmode,
2799 gen_rtx_MEM (GET_MODE (reg),
2800 plus_constant (base, off)),
2807 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2809 int iter = spill_fill_data.next_iter;
2812 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2813 GEN_INT (cfa_off)));
2814 spill_fill_data.prev_insn[iter] = insn;
2817 /* Wrapper functions that discards the CONST_INT spill offset. These
2818 exist so that we can give gr_spill/gr_fill the offset they need and
2819 use a consistent function interface. */
2822 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2824 return gen_movdi (dest, src);
2828 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2830 return gen_fr_spill (dest, src);
2834 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2836 return gen_fr_restore (dest, src);
2839 /* Called after register allocation to add any instructions needed for the
2840 prologue. Using a prologue insn is favored compared to putting all of the
2841 instructions in output_function_prologue(), since it allows the scheduler
2842 to intermix instructions with the saves of the caller saved registers. In
2843 some cases, it might be necessary to emit a barrier instruction as the last
2844 insn to prevent such scheduling.
2846 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2847 so that the debug info generation code can handle them properly.
2849 The register save area is layed out like so:
2851 [ varargs spill area ]
2852 [ fr register spill area ]
2853 [ br register spill area ]
2854 [ ar register spill area ]
2855 [ pr register spill area ]
2856 [ gr register spill area ] */
2858 /* ??? Get inefficient code when the frame size is larger than can fit in an
2859 adds instruction. */
2862 ia64_expand_prologue (void)
2864 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2865 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2868 ia64_compute_frame_size (get_frame_size ());
2869 last_scratch_gr_reg = 15;
2871 /* If there is no epilogue, then we don't need some prologue insns.
2872 We need to avoid emitting the dead prologue insns, because flow
2873 will complain about them. */
2879 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2880 if ((e->flags & EDGE_FAKE) == 0
2881 && (e->flags & EDGE_FALLTHRU) != 0)
2883 epilogue_p = (e != NULL);
2888 /* Set the local, input, and output register names. We need to do this
2889 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2890 half. If we use in/loc/out register names, then we get assembler errors
2891 in crtn.S because there is no alloc insn or regstk directive in there. */
2892 if (! TARGET_REG_NAMES)
2894 int inputs = current_frame_info.n_input_regs;
2895 int locals = current_frame_info.n_local_regs;
2896 int outputs = current_frame_info.n_output_regs;
2898 for (i = 0; i < inputs; i++)
2899 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2900 for (i = 0; i < locals; i++)
2901 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2902 for (i = 0; i < outputs; i++)
2903 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2906 /* Set the frame pointer register name. The regnum is logically loc79,
2907 but of course we'll not have allocated that many locals. Rather than
2908 worrying about renumbering the existing rtxs, we adjust the name. */
2909 /* ??? This code means that we can never use one local register when
2910 there is a frame pointer. loc79 gets wasted in this case, as it is
2911 renamed to a register that will never be used. See also the try_locals
2912 code in find_gr_spill. */
2913 if (current_frame_info.reg_fp)
2915 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2916 reg_names[HARD_FRAME_POINTER_REGNUM]
2917 = reg_names[current_frame_info.reg_fp];
2918 reg_names[current_frame_info.reg_fp] = tmp;
2921 /* We don't need an alloc instruction if we've used no outputs or locals. */
2922 if (current_frame_info.n_local_regs == 0
2923 && current_frame_info.n_output_regs == 0
2924 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2925 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2927 /* If there is no alloc, but there are input registers used, then we
2928 need a .regstk directive. */
2929 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2930 ar_pfs_save_reg = NULL_RTX;
2934 current_frame_info.need_regstk = 0;
2936 if (current_frame_info.reg_save_ar_pfs)
2937 regno = current_frame_info.reg_save_ar_pfs;
2939 regno = next_scratch_gr_reg ();
2940 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2942 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2943 GEN_INT (current_frame_info.n_input_regs),
2944 GEN_INT (current_frame_info.n_local_regs),
2945 GEN_INT (current_frame_info.n_output_regs),
2946 GEN_INT (current_frame_info.n_rotate_regs)));
2947 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2950 /* Set up frame pointer, stack pointer, and spill iterators. */
2952 n_varargs = cfun->machine->n_varargs;
2953 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2954 stack_pointer_rtx, 0);
2956 if (frame_pointer_needed)
2958 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2959 RTX_FRAME_RELATED_P (insn) = 1;
2962 if (current_frame_info.total_size != 0)
2964 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2967 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2968 offset = frame_size_rtx;
2971 regno = next_scratch_gr_reg ();
2972 offset = gen_rtx_REG (DImode, regno);
2973 emit_move_insn (offset, frame_size_rtx);
2976 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2977 stack_pointer_rtx, offset));
2979 if (! frame_pointer_needed)
2981 RTX_FRAME_RELATED_P (insn) = 1;
2982 if (GET_CODE (offset) != CONST_INT)
2985 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2986 gen_rtx_SET (VOIDmode,
2988 gen_rtx_PLUS (DImode,
2995 /* ??? At this point we must generate a magic insn that appears to
2996 modify the stack pointer, the frame pointer, and all spill
2997 iterators. This would allow the most scheduling freedom. For
2998 now, just hard stop. */
2999 emit_insn (gen_blockage ());
3002 /* Must copy out ar.unat before doing any integer spills. */
3003 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3005 if (current_frame_info.reg_save_ar_unat)
3007 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3010 alt_regno = next_scratch_gr_reg ();
3011 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3012 current_frame_info.gr_used_mask |= 1 << alt_regno;
3015 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3016 insn = emit_move_insn (ar_unat_save_reg, reg);
3017 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
3019 /* Even if we're not going to generate an epilogue, we still
3020 need to save the register so that EH works. */
3021 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
3022 emit_insn (gen_prologue_use (ar_unat_save_reg));
3025 ar_unat_save_reg = NULL_RTX;
3027 /* Spill all varargs registers. Do this before spilling any GR registers,
3028 since we want the UNAT bits for the GR registers to override the UNAT
3029 bits from varargs, which we don't care about. */
3032 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3034 reg = gen_rtx_REG (DImode, regno);
3035 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3038 /* Locate the bottom of the register save area. */
3039 cfa_off = (current_frame_info.spill_cfa_off
3040 + current_frame_info.spill_size
3041 + current_frame_info.extra_spill_size);
3043 /* Save the predicate register block either in a register or in memory. */
3044 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3046 reg = gen_rtx_REG (DImode, PR_REG (0));
3047 if (current_frame_info.reg_save_pr != 0)
3049 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
3050 insn = emit_move_insn (alt_reg, reg);
3052 /* ??? Denote pr spill/fill by a DImode move that modifies all
3053 64 hard registers. */
3054 RTX_FRAME_RELATED_P (insn) = 1;
3056 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3057 gen_rtx_SET (VOIDmode, alt_reg, reg),
3060 /* Even if we're not going to generate an epilogue, we still
3061 need to save the register so that EH works. */
3063 emit_insn (gen_prologue_use (alt_reg));
3067 alt_regno = next_scratch_gr_reg ();
3068 alt_reg = gen_rtx_REG (DImode, alt_regno);
3069 insn = emit_move_insn (alt_reg, reg);
3070 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3075 /* Handle AR regs in numerical order. All of them get special handling. */
3076 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3077 && current_frame_info.reg_save_ar_unat == 0)
3079 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3080 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3084 /* The alloc insn already copied ar.pfs into a general register. The
3085 only thing we have to do now is copy that register to a stack slot
3086 if we'd not allocated a local register for the job. */
3087 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3088 && current_frame_info.reg_save_ar_pfs == 0)
3090 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3091 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3095 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3097 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3098 if (current_frame_info.reg_save_ar_lc != 0)
3100 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3101 insn = emit_move_insn (alt_reg, reg);
3102 RTX_FRAME_RELATED_P (insn) = 1;
3104 /* Even if we're not going to generate an epilogue, we still
3105 need to save the register so that EH works. */
3107 emit_insn (gen_prologue_use (alt_reg));
3111 alt_regno = next_scratch_gr_reg ();
3112 alt_reg = gen_rtx_REG (DImode, alt_regno);
3113 emit_move_insn (alt_reg, reg);
3114 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3119 if (current_frame_info.reg_save_gp)
3121 insn = emit_move_insn (gen_rtx_REG (DImode,
3122 current_frame_info.reg_save_gp),
3123 pic_offset_table_rtx);
3124 /* We don't know for sure yet if this is actually needed, since
3125 we've not split the PIC call patterns. If all of the calls
3126 are indirect, and not followed by any uses of the gp, then
3127 this save is dead. Allow it to go away. */
3129 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
3132 /* We should now be at the base of the gr/br/fr spill area. */
3133 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3134 + current_frame_info.spill_size));
3136 /* Spill all general registers. */
3137 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3138 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3140 reg = gen_rtx_REG (DImode, regno);
3141 do_spill (gen_gr_spill, reg, cfa_off, reg);
3145 /* Handle BR0 specially -- it may be getting stored permanently in
3146 some GR register. */
3147 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3149 reg = gen_rtx_REG (DImode, BR_REG (0));
3150 if (current_frame_info.reg_save_b0 != 0)
3152 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3153 insn = emit_move_insn (alt_reg, reg);
3154 RTX_FRAME_RELATED_P (insn) = 1;
3156 /* Even if we're not going to generate an epilogue, we still
3157 need to save the register so that EH works. */
3159 emit_insn (gen_prologue_use (alt_reg));
3163 alt_regno = next_scratch_gr_reg ();
3164 alt_reg = gen_rtx_REG (DImode, alt_regno);
3165 emit_move_insn (alt_reg, reg);
3166 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3171 /* Spill the rest of the BR registers. */
3172 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3173 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3175 alt_regno = next_scratch_gr_reg ();
3176 alt_reg = gen_rtx_REG (DImode, alt_regno);
3177 reg = gen_rtx_REG (DImode, regno);
3178 emit_move_insn (alt_reg, reg);
3179 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3183 /* Align the frame and spill all FR registers. */
3184 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3185 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3187 gcc_assert (!(cfa_off & 15));
3188 reg = gen_rtx_REG (XFmode, regno);
3189 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3193 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3195 finish_spill_pointers ();
3198 /* Called after register allocation to add any instructions needed for the
3199 epilogue. Using an epilogue insn is favored compared to putting all of the
3200 instructions in output_function_prologue(), since it allows the scheduler
3201 to intermix instructions with the saves of the caller saved registers. In
3202 some cases, it might be necessary to emit a barrier instruction as the last
3203 insn to prevent such scheduling. */
3206 ia64_expand_epilogue (int sibcall_p)
3208 rtx insn, reg, alt_reg, ar_unat_save_reg;
3209 int regno, alt_regno, cfa_off;
3211 ia64_compute_frame_size (get_frame_size ());
3213 /* If there is a frame pointer, then we use it instead of the stack
3214 pointer, so that the stack pointer does not need to be valid when
3215 the epilogue starts. See EXIT_IGNORE_STACK. */
3216 if (frame_pointer_needed)
3217 setup_spill_pointers (current_frame_info.n_spilled,
3218 hard_frame_pointer_rtx, 0);
3220 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3221 current_frame_info.total_size);
3223 if (current_frame_info.total_size != 0)
3225 /* ??? At this point we must generate a magic insn that appears to
3226 modify the spill iterators and the frame pointer. This would
3227 allow the most scheduling freedom. For now, just hard stop. */
3228 emit_insn (gen_blockage ());
3231 /* Locate the bottom of the register save area. */
3232 cfa_off = (current_frame_info.spill_cfa_off
3233 + current_frame_info.spill_size
3234 + current_frame_info.extra_spill_size);
3236 /* Restore the predicate registers. */
3237 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3239 if (current_frame_info.reg_save_pr != 0)
3240 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
3243 alt_regno = next_scratch_gr_reg ();
3244 alt_reg = gen_rtx_REG (DImode, alt_regno);
3245 do_restore (gen_movdi_x, alt_reg, cfa_off);
3248 reg = gen_rtx_REG (DImode, PR_REG (0));
3249 emit_move_insn (reg, alt_reg);
3252 /* Restore the application registers. */
3254 /* Load the saved unat from the stack, but do not restore it until
3255 after the GRs have been restored. */
3256 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3258 if (current_frame_info.reg_save_ar_unat != 0)
3260 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3263 alt_regno = next_scratch_gr_reg ();
3264 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3265 current_frame_info.gr_used_mask |= 1 << alt_regno;
3266 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3271 ar_unat_save_reg = NULL_RTX;
3273 if (current_frame_info.reg_save_ar_pfs != 0)
3275 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
3276 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3277 emit_move_insn (reg, alt_reg);
3279 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3281 alt_regno = next_scratch_gr_reg ();
3282 alt_reg = gen_rtx_REG (DImode, alt_regno);
3283 do_restore (gen_movdi_x, alt_reg, cfa_off);
3285 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3286 emit_move_insn (reg, alt_reg);
3289 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3291 if (current_frame_info.reg_save_ar_lc != 0)
3292 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3295 alt_regno = next_scratch_gr_reg ();
3296 alt_reg = gen_rtx_REG (DImode, alt_regno);
3297 do_restore (gen_movdi_x, alt_reg, cfa_off);
3300 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3301 emit_move_insn (reg, alt_reg);
3304 /* We should now be at the base of the gr/br/fr spill area. */
3305 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3306 + current_frame_info.spill_size));
3308 /* The GP may be stored on the stack in the prologue, but it's
3309 never restored in the epilogue. Skip the stack slot. */
3310 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3313 /* Restore all general registers. */
3314 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3315 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3317 reg = gen_rtx_REG (DImode, regno);
3318 do_restore (gen_gr_restore, reg, cfa_off);
3322 /* Restore the branch registers. Handle B0 specially, as it may
3323 have gotten stored in some GR register. */
3324 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3326 if (current_frame_info.reg_save_b0 != 0)
3327 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3330 alt_regno = next_scratch_gr_reg ();
3331 alt_reg = gen_rtx_REG (DImode, alt_regno);
3332 do_restore (gen_movdi_x, alt_reg, cfa_off);
3335 reg = gen_rtx_REG (DImode, BR_REG (0));
3336 emit_move_insn (reg, alt_reg);
3339 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3340 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3342 alt_regno = next_scratch_gr_reg ();
3343 alt_reg = gen_rtx_REG (DImode, alt_regno);
3344 do_restore (gen_movdi_x, alt_reg, cfa_off);
3346 reg = gen_rtx_REG (DImode, regno);
3347 emit_move_insn (reg, alt_reg);
3350 /* Restore floating point registers. */
3351 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3352 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3354 gcc_assert (!(cfa_off & 15));
3355 reg = gen_rtx_REG (XFmode, regno);
3356 do_restore (gen_fr_restore_x, reg, cfa_off);
3360 /* Restore ar.unat for real. */
3361 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3363 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3364 emit_move_insn (reg, ar_unat_save_reg);
3367 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3369 finish_spill_pointers ();
3371 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3373 /* ??? At this point we must generate a magic insn that appears to
3374 modify the spill iterators, the stack pointer, and the frame
3375 pointer. This would allow the most scheduling freedom. For now,
3377 emit_insn (gen_blockage ());
3380 if (cfun->machine->ia64_eh_epilogue_sp)
3381 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3382 else if (frame_pointer_needed)
3384 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3385 RTX_FRAME_RELATED_P (insn) = 1;
3387 else if (current_frame_info.total_size)
3389 rtx offset, frame_size_rtx;
3391 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3392 if (CONST_OK_FOR_I (current_frame_info.total_size))
3393 offset = frame_size_rtx;
3396 regno = next_scratch_gr_reg ();
3397 offset = gen_rtx_REG (DImode, regno);
3398 emit_move_insn (offset, frame_size_rtx);
3401 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3404 RTX_FRAME_RELATED_P (insn) = 1;
3405 if (GET_CODE (offset) != CONST_INT)
3408 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3409 gen_rtx_SET (VOIDmode,
3411 gen_rtx_PLUS (DImode,
3418 if (cfun->machine->ia64_eh_epilogue_bsp)
3419 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3422 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3425 int fp = GR_REG (2);
3426 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3427 first available call clobbered register. If there was a frame_pointer
3428 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3429 so we have to make sure we're using the string "r2" when emitting
3430 the register name for the assembler. */
3431 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3432 fp = HARD_FRAME_POINTER_REGNUM;
3434 /* We must emit an alloc to force the input registers to become output
3435 registers. Otherwise, if the callee tries to pass its parameters
3436 through to another call without an intervening alloc, then these
3438 /* ??? We don't need to preserve all input registers. We only need to
3439 preserve those input registers used as arguments to the sibling call.
3440 It is unclear how to compute that number here. */
3441 if (current_frame_info.n_input_regs != 0)
3443 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3444 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3445 const0_rtx, const0_rtx,
3446 n_inputs, const0_rtx));
3447 RTX_FRAME_RELATED_P (insn) = 1;
3452 /* Return 1 if br.ret can do all the work required to return from a
3456 ia64_direct_return (void)
3458 if (reload_completed && ! frame_pointer_needed)
3460 ia64_compute_frame_size (get_frame_size ());
3462 return (current_frame_info.total_size == 0
3463 && current_frame_info.n_spilled == 0
3464 && current_frame_info.reg_save_b0 == 0
3465 && current_frame_info.reg_save_pr == 0
3466 && current_frame_info.reg_save_ar_pfs == 0
3467 && current_frame_info.reg_save_ar_unat == 0
3468 && current_frame_info.reg_save_ar_lc == 0);