1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
46 #include "sched-int.h"
49 #include "target-def.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
55 /* This is used for communication between ASM_OUTPUT_LABEL and
56 ASM_OUTPUT_LABELREF. */
57 int ia64_asm_output_label = 0;
59 /* Define the information needed to generate branch and scc insns. This is
60 stored from the compare operation. */
61 struct rtx_def * ia64_compare_op0;
62 struct rtx_def * ia64_compare_op1;
64 /* Register names for ia64_expand_prologue. */
65 static const char * const ia64_reg_numbers[96] =
66 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
67 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
68 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
69 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
70 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
71 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
72 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
73 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
74 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
75 "r104","r105","r106","r107","r108","r109","r110","r111",
76 "r112","r113","r114","r115","r116","r117","r118","r119",
77 "r120","r121","r122","r123","r124","r125","r126","r127"};
79 /* ??? These strings could be shared with REGISTER_NAMES. */
80 static const char * const ia64_input_reg_names[8] =
81 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
83 /* ??? These strings could be shared with REGISTER_NAMES. */
84 static const char * const ia64_local_reg_names[80] =
85 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
86 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
87 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
88 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
89 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
90 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
91 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
92 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
93 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
94 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
96 /* ??? These strings could be shared with REGISTER_NAMES. */
97 static const char * const ia64_output_reg_names[8] =
98 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
100 /* String used with the -mfixed-range= option. */
101 const char *ia64_fixed_range_string;
103 /* Determines whether we use adds, addl, or movl to generate our
104 TLS immediate offsets. */
105 int ia64_tls_size = 22;
107 /* String used with the -mtls-size= option. */
108 const char *ia64_tls_size_string;
110 /* Which cpu are we scheduling for. */
111 enum processor_type ia64_tune;
113 /* String used with the -tune= option. */
114 const char *ia64_tune_string;
116 /* Determines whether we run our final scheduling pass or not. We always
117 avoid the normal second scheduling pass. */
118 static int ia64_flag_schedule_insns2;
120 /* Variables which are this size or smaller are put in the sdata/sbss
123 unsigned int ia64_section_threshold;
125 /* The following variable is used by the DFA insn scheduler. The value is
126 TRUE if we do insn bundling instead of insn scheduling. */
129 /* Structure to be filled in by ia64_compute_frame_size with register
130 save masks and offsets for the current function. */
132 struct ia64_frame_info
134 HOST_WIDE_INT total_size; /* size of the stack frame, not including
135 the caller's scratch area. */
136 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
137 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
138 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
139 HARD_REG_SET mask; /* mask of saved registers. */
140 unsigned int gr_used_mask; /* mask of registers in use as gr spill
141 registers or long-term scratches. */
142 int n_spilled; /* number of spilled registers. */
143 int reg_fp; /* register for fp. */
144 int reg_save_b0; /* save register for b0. */
145 int reg_save_pr; /* save register for prs. */
146 int reg_save_ar_pfs; /* save register for ar.pfs. */
147 int reg_save_ar_unat; /* save register for ar.unat. */
148 int reg_save_ar_lc; /* save register for ar.lc. */
149 int reg_save_gp; /* save register for gp. */
150 int n_input_regs; /* number of input registers used. */
151 int n_local_regs; /* number of local registers used. */
152 int n_output_regs; /* number of output registers used. */
153 int n_rotate_regs; /* number of rotating registers used. */
155 char need_regstk; /* true if a .regstk directive needed. */
156 char initialized; /* true if the data is finalized. */
159 /* Current frame information calculated by ia64_compute_frame_size. */
160 static struct ia64_frame_info current_frame_info;
162 static int ia64_use_dfa_pipeline_interface (void);
163 static int ia64_first_cycle_multipass_dfa_lookahead (void);
164 static void ia64_dependencies_evaluation_hook (rtx, rtx);
165 static void ia64_init_dfa_pre_cycle_insn (void);
166 static rtx ia64_dfa_pre_cycle_insn (void);
167 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
168 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
169 static rtx gen_tls_get_addr (void);
170 static rtx gen_thread_pointer (void);
171 static rtx ia64_expand_tls_address (enum tls_model, rtx, rtx);
172 static int find_gr_spill (int);
173 static int next_scratch_gr_reg (void);
174 static void mark_reg_gr_used_mask (rtx, void *);
175 static void ia64_compute_frame_size (HOST_WIDE_INT);
176 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
177 static void finish_spill_pointers (void);
178 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
179 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
180 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
181 static rtx gen_movdi_x (rtx, rtx, rtx);
182 static rtx gen_fr_spill_x (rtx, rtx, rtx);
183 static rtx gen_fr_restore_x (rtx, rtx, rtx);
185 static enum machine_mode hfa_element_mode (tree, int);
186 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
188 static bool ia64_function_ok_for_sibcall (tree, tree);
189 static bool ia64_return_in_memory (tree, tree);
190 static bool ia64_rtx_costs (rtx, int, int, int *);
191 static void fix_range (const char *);
192 static struct machine_function * ia64_init_machine_status (void);
193 static void emit_insn_group_barriers (FILE *);
194 static void emit_all_insn_group_barriers (FILE *);
195 static void final_emit_insn_group_barriers (FILE *);
196 static void emit_predicate_relation_info (void);
197 static void ia64_reorg (void);
198 static bool ia64_in_small_data_p (tree);
199 static void process_epilogue (void);
200 static int process_set (FILE *, rtx);
202 static rtx ia64_expand_fetch_and_op (optab, enum machine_mode, tree, rtx);
203 static rtx ia64_expand_op_and_fetch (optab, enum machine_mode, tree, rtx);
204 static rtx ia64_expand_compare_and_swap (enum machine_mode, enum machine_mode,
206 static rtx ia64_expand_lock_test_and_set (enum machine_mode, tree, rtx);
207 static rtx ia64_expand_lock_release (enum machine_mode, tree, rtx);
208 static bool ia64_assemble_integer (rtx, unsigned int, int);
209 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
210 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
211 static void ia64_output_function_end_prologue (FILE *);
213 static int ia64_issue_rate (void);
214 static int ia64_adjust_cost (rtx, rtx, rtx, int);
215 static void ia64_sched_init (FILE *, int, int);
216 static void ia64_sched_finish (FILE *, int);
217 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
218 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
219 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
220 static int ia64_variable_issue (FILE *, int, rtx, int);
222 static struct bundle_state *get_free_bundle_state (void);
223 static void free_bundle_state (struct bundle_state *);
224 static void initiate_bundle_states (void);
225 static void finish_bundle_states (void);
226 static unsigned bundle_state_hash (const void *);
227 static int bundle_state_eq_p (const void *, const void *);
228 static int insert_bundle_state (struct bundle_state *);
229 static void initiate_bundle_state_table (void);
230 static void finish_bundle_state_table (void);
231 static int try_issue_nops (struct bundle_state *, int);
232 static int try_issue_insn (struct bundle_state *, rtx);
233 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
234 static int get_max_pos (state_t);
235 static int get_template (state_t, int);
237 static rtx get_next_important_insn (rtx, rtx);
238 static void bundling (FILE *, int, rtx, rtx);
240 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
241 HOST_WIDE_INT, tree);
242 static void ia64_file_start (void);
244 static void ia64_select_rtx_section (enum machine_mode, rtx,
245 unsigned HOST_WIDE_INT);
246 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
248 static void ia64_rwreloc_unique_section (tree, int)
250 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
251 unsigned HOST_WIDE_INT)
253 static unsigned int ia64_rwreloc_section_type_flags (tree, const char *, int)
256 static void ia64_hpux_add_extern_decl (const char *name)
258 static void ia64_hpux_file_end (void)
260 static void ia64_hpux_init_libfuncs (void)
262 static void ia64_vms_init_libfuncs (void)
265 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
266 static void ia64_encode_section_info (tree, rtx, int);
267 static rtx ia64_struct_value_rtx (tree, int);
270 /* Table of valid machine attributes. */
271 static const struct attribute_spec ia64_attribute_table[] =
273 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
274 { "syscall_linkage", 0, 0, false, true, true, NULL },
275 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
276 { NULL, 0, 0, false, false, false, NULL }
279 /* Initialize the GCC target structure. */
280 #undef TARGET_ATTRIBUTE_TABLE
281 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
283 #undef TARGET_INIT_BUILTINS
284 #define TARGET_INIT_BUILTINS ia64_init_builtins
286 #undef TARGET_EXPAND_BUILTIN
287 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
289 #undef TARGET_ASM_BYTE_OP
290 #define TARGET_ASM_BYTE_OP "\tdata1\t"
291 #undef TARGET_ASM_ALIGNED_HI_OP
292 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
293 #undef TARGET_ASM_ALIGNED_SI_OP
294 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
295 #undef TARGET_ASM_ALIGNED_DI_OP
296 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
297 #undef TARGET_ASM_UNALIGNED_HI_OP
298 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
299 #undef TARGET_ASM_UNALIGNED_SI_OP
300 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
301 #undef TARGET_ASM_UNALIGNED_DI_OP
302 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
303 #undef TARGET_ASM_INTEGER
304 #define TARGET_ASM_INTEGER ia64_assemble_integer
306 #undef TARGET_ASM_FUNCTION_PROLOGUE
307 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
308 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
309 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
310 #undef TARGET_ASM_FUNCTION_EPILOGUE
311 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
313 #undef TARGET_IN_SMALL_DATA_P
314 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
316 #undef TARGET_SCHED_ADJUST_COST
317 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
318 #undef TARGET_SCHED_ISSUE_RATE
319 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
320 #undef TARGET_SCHED_VARIABLE_ISSUE
321 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
322 #undef TARGET_SCHED_INIT
323 #define TARGET_SCHED_INIT ia64_sched_init
324 #undef TARGET_SCHED_FINISH
325 #define TARGET_SCHED_FINISH ia64_sched_finish
326 #undef TARGET_SCHED_REORDER
327 #define TARGET_SCHED_REORDER ia64_sched_reorder
328 #undef TARGET_SCHED_REORDER2
329 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
331 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
332 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
334 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
335 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia64_use_dfa_pipeline_interface
337 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
338 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
340 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
341 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
342 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
343 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
345 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
346 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
347 ia64_first_cycle_multipass_dfa_lookahead_guard
349 #undef TARGET_SCHED_DFA_NEW_CYCLE
350 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
352 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
353 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
355 #undef TARGET_ASM_OUTPUT_MI_THUNK
356 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
357 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
358 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
360 #undef TARGET_ASM_FILE_START
361 #define TARGET_ASM_FILE_START ia64_file_start
363 #undef TARGET_RTX_COSTS
364 #define TARGET_RTX_COSTS ia64_rtx_costs
365 #undef TARGET_ADDRESS_COST
366 #define TARGET_ADDRESS_COST hook_int_rtx_0
368 #undef TARGET_MACHINE_DEPENDENT_REORG
369 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
371 #undef TARGET_ENCODE_SECTION_INFO
372 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
374 /* ??? ABI doesn't allow us to define this. */
376 #undef TARGET_PROMOTE_FUNCTION_ARGS
377 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
380 /* ??? ABI doesn't allow us to define this. */
382 #undef TARGET_PROMOTE_FUNCTION_RETURN
383 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
386 /* ??? Investigate. */
388 #undef TARGET_PROMOTE_PROTOTYPES
389 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
392 #undef TARGET_STRUCT_VALUE_RTX
393 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
394 #undef TARGET_RETURN_IN_MEMORY
395 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
397 #undef TARGET_SETUP_INCOMING_VARARGS
398 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
399 #undef TARGET_STRICT_ARGUMENT_NAMING
400 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
402 struct gcc_target targetm = TARGET_INITIALIZER;
404 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
407 call_operand (rtx op, enum machine_mode mode)
409 if (mode != GET_MODE (op) && mode != VOIDmode)
412 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
413 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
416 /* Return 1 if OP refers to a symbol in the sdata section. */
419 sdata_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
421 switch (GET_CODE (op))
424 if (GET_CODE (XEXP (op, 0)) != PLUS
425 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
427 op = XEXP (XEXP (op, 0), 0);
431 if (CONSTANT_POOL_ADDRESS_P (op))
432 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
434 return SYMBOL_REF_LOCAL_P (op) && SYMBOL_REF_SMALL_P (op);
444 small_addr_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
446 return SYMBOL_REF_SMALL_ADDR_P (op);
449 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
452 got_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
454 switch (GET_CODE (op))
458 if (GET_CODE (op) != PLUS)
460 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
463 if (GET_CODE (op) != CONST_INT)
468 /* Ok if we're not using GOT entries at all. */
469 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
472 /* "Ok" while emitting rtl, since otherwise we won't be provided
473 with the entire offset during emission, which makes it very
474 hard to split the offset into high and low parts. */
475 if (rtx_equal_function_value_matters)
478 /* Force the low 14 bits of the constant to zero so that we do not
479 use up so many GOT entries. */
480 return (INTVAL (op) & 0x3fff) == 0;
483 if (SYMBOL_REF_SMALL_ADDR_P (op))
494 /* Return 1 if OP refers to a symbol. */
497 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
499 switch (GET_CODE (op))
512 /* Return tls_model if OP refers to a TLS symbol. */
515 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
517 if (GET_CODE (op) != SYMBOL_REF)
519 return SYMBOL_REF_TLS_MODEL (op);
523 /* Return 1 if OP refers to a function. */
526 function_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
528 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (op))
534 /* Return 1 if OP is setjmp or a similar function. */
536 /* ??? This is an unsatisfying solution. Should rethink. */
539 setjmp_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
544 if (GET_CODE (op) != SYMBOL_REF)
549 /* The following code is borrowed from special_function_p in calls.c. */
551 /* Disregard prefix _, __ or __x. */
554 if (name[1] == '_' && name[2] == 'x')
556 else if (name[1] == '_')
566 && (! strcmp (name, "setjmp")
567 || ! strcmp (name, "setjmp_syscall")))
569 && ! strcmp (name, "sigsetjmp"))
571 && ! strcmp (name, "savectx")));
573 else if ((name[0] == 'q' && name[1] == 's'
574 && ! strcmp (name, "qsetjmp"))
575 || (name[0] == 'v' && name[1] == 'f'
576 && ! strcmp (name, "vfork")))
582 /* Return 1 if OP is a general operand, excluding tls symbolic operands. */
585 move_operand (rtx op, enum machine_mode mode)
587 return general_operand (op, mode) && !tls_symbolic_operand (op, mode);
590 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
593 gr_register_operand (rtx op, enum machine_mode mode)
595 if (! register_operand (op, mode))
597 if (GET_CODE (op) == SUBREG)
598 op = SUBREG_REG (op);
599 if (GET_CODE (op) == REG)
601 unsigned int regno = REGNO (op);
602 if (regno < FIRST_PSEUDO_REGISTER)
603 return GENERAL_REGNO_P (regno);
608 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
611 fr_register_operand (rtx op, enum machine_mode mode)
613 if (! register_operand (op, mode))
615 if (GET_CODE (op) == SUBREG)
616 op = SUBREG_REG (op);
617 if (GET_CODE (op) == REG)
619 unsigned int regno = REGNO (op);
620 if (regno < FIRST_PSEUDO_REGISTER)
621 return FR_REGNO_P (regno);
626 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
629 grfr_register_operand (rtx op, enum machine_mode mode)
631 if (! register_operand (op, mode))
633 if (GET_CODE (op) == SUBREG)
634 op = SUBREG_REG (op);
635 if (GET_CODE (op) == REG)
637 unsigned int regno = REGNO (op);
638 if (regno < FIRST_PSEUDO_REGISTER)
639 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
644 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
647 gr_nonimmediate_operand (rtx op, enum machine_mode mode)
649 if (! nonimmediate_operand (op, mode))
651 if (GET_CODE (op) == SUBREG)
652 op = SUBREG_REG (op);
653 if (GET_CODE (op) == REG)
655 unsigned int regno = REGNO (op);
656 if (regno < FIRST_PSEUDO_REGISTER)
657 return GENERAL_REGNO_P (regno);
662 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
665 fr_nonimmediate_operand (rtx op, enum machine_mode mode)
667 if (! nonimmediate_operand (op, mode))
669 if (GET_CODE (op) == SUBREG)
670 op = SUBREG_REG (op);
671 if (GET_CODE (op) == REG)
673 unsigned int regno = REGNO (op);
674 if (regno < FIRST_PSEUDO_REGISTER)
675 return FR_REGNO_P (regno);
680 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
683 grfr_nonimmediate_operand (rtx op, enum machine_mode mode)
685 if (! nonimmediate_operand (op, mode))
687 if (GET_CODE (op) == SUBREG)
688 op = SUBREG_REG (op);
689 if (GET_CODE (op) == REG)
691 unsigned int regno = REGNO (op);
692 if (regno < FIRST_PSEUDO_REGISTER)
693 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
698 /* Return 1 if OP is a GR register operand, or zero. */
701 gr_reg_or_0_operand (rtx op, enum machine_mode mode)
703 return (op == const0_rtx || gr_register_operand (op, mode));
706 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
709 gr_reg_or_5bit_operand (rtx op, enum machine_mode mode)
711 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
712 || GET_CODE (op) == CONSTANT_P_RTX
713 || gr_register_operand (op, mode));
716 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
719 gr_reg_or_6bit_operand (rtx op, enum machine_mode mode)
721 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
722 || GET_CODE (op) == CONSTANT_P_RTX
723 || gr_register_operand (op, mode));
726 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
729 gr_reg_or_8bit_operand (rtx op, enum machine_mode mode)
731 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
732 || GET_CODE (op) == CONSTANT_P_RTX
733 || gr_register_operand (op, mode));
736 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
739 grfr_reg_or_8bit_operand (rtx op, enum machine_mode mode)
741 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
742 || GET_CODE (op) == CONSTANT_P_RTX
743 || grfr_register_operand (op, mode));
746 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
750 gr_reg_or_8bit_adjusted_operand (rtx op, enum machine_mode mode)
752 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
753 || GET_CODE (op) == CONSTANT_P_RTX
754 || gr_register_operand (op, mode));
757 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
758 immediate and an 8 bit adjusted immediate operand. This is necessary
759 because when we emit a compare, we don't know what the condition will be,
760 so we need the union of the immediates accepted by GT and LT. */
763 gr_reg_or_8bit_and_adjusted_operand (rtx op, enum machine_mode mode)
765 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
766 && CONST_OK_FOR_L (INTVAL (op)))
767 || GET_CODE (op) == CONSTANT_P_RTX
768 || gr_register_operand (op, mode));
771 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
774 gr_reg_or_14bit_operand (rtx op, enum machine_mode mode)
776 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
777 || GET_CODE (op) == CONSTANT_P_RTX
778 || gr_register_operand (op, mode));
781 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
784 gr_reg_or_22bit_operand (rtx op, enum machine_mode mode)
786 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
787 || GET_CODE (op) == CONSTANT_P_RTX
788 || gr_register_operand (op, mode));
791 /* Return 1 if OP is a 6 bit immediate operand. */
794 shift_count_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
796 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
797 || GET_CODE (op) == CONSTANT_P_RTX);
800 /* Return 1 if OP is a 5 bit immediate operand. */
803 shift_32bit_count_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
805 return ((GET_CODE (op) == CONST_INT
806 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
807 || GET_CODE (op) == CONSTANT_P_RTX);
810 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
813 shladd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
815 return (GET_CODE (op) == CONST_INT
816 && (INTVAL (op) == 2 || INTVAL (op) == 4
817 || INTVAL (op) == 8 || INTVAL (op) == 16));
820 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
823 fetchadd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
825 return (GET_CODE (op) == CONST_INT
826 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
827 INTVAL (op) == -4 || INTVAL (op) == -1 ||
828 INTVAL (op) == 1 || INTVAL (op) == 4 ||
829 INTVAL (op) == 8 || INTVAL (op) == 16));
832 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
835 fr_reg_or_fp01_operand (rtx op, enum machine_mode mode)
837 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
838 || fr_register_operand (op, mode));
841 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
842 POST_MODIFY with a REG as displacement. */
845 destination_operand (rtx op, enum machine_mode mode)
847 if (! nonimmediate_operand (op, mode))
849 if (GET_CODE (op) == MEM
850 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
851 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
856 /* Like memory_operand, but don't allow post-increments. */
859 not_postinc_memory_operand (rtx op, enum machine_mode mode)
861 return (memory_operand (op, mode)
862 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
865 /* Return 1 if this is a comparison operator, which accepts a normal 8-bit
866 signed immediate operand. */
869 normal_comparison_operator (register rtx op, enum machine_mode mode)
871 enum rtx_code code = GET_CODE (op);
872 return ((mode == VOIDmode || GET_MODE (op) == mode)
873 && (code == EQ || code == NE
874 || code == GT || code == LE || code == GTU || code == LEU));
877 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
878 signed immediate operand. */
881 adjusted_comparison_operator (register rtx op, enum machine_mode mode)
883 enum rtx_code code = GET_CODE (op);
884 return ((mode == VOIDmode || GET_MODE (op) == mode)
885 && (code == LT || code == GE || code == LTU || code == GEU));
888 /* Return 1 if this is a signed inequality operator. */
891 signed_inequality_operator (register rtx op, enum machine_mode mode)
893 enum rtx_code code = GET_CODE (op);
894 return ((mode == VOIDmode || GET_MODE (op) == mode)
895 && (code == GE || code == GT
896 || code == LE || code == LT));
899 /* Return 1 if this operator is valid for predication. */
902 predicate_operator (register rtx op, enum machine_mode mode)
904 enum rtx_code code = GET_CODE (op);
905 return ((GET_MODE (op) == mode || mode == VOIDmode)
906 && (code == EQ || code == NE));
909 /* Return 1 if this operator can be used in a conditional operation. */
912 condop_operator (register rtx op, enum machine_mode mode)
914 enum rtx_code code = GET_CODE (op);
915 return ((GET_MODE (op) == mode || mode == VOIDmode)
916 && (code == PLUS || code == MINUS || code == AND
917 || code == IOR || code == XOR));
920 /* Return 1 if this is the ar.lc register. */
923 ar_lc_reg_operand (register rtx op, enum machine_mode mode)
925 return (GET_MODE (op) == DImode
926 && (mode == DImode || mode == VOIDmode)
927 && GET_CODE (op) == REG
928 && REGNO (op) == AR_LC_REGNUM);
931 /* Return 1 if this is the ar.ccv register. */
934 ar_ccv_reg_operand (register rtx op, enum machine_mode mode)
936 return ((GET_MODE (op) == mode || mode == VOIDmode)
937 && GET_CODE (op) == REG
938 && REGNO (op) == AR_CCV_REGNUM);
941 /* Return 1 if this is the ar.pfs register. */
944 ar_pfs_reg_operand (register rtx op, enum machine_mode mode)
946 return ((GET_MODE (op) == mode || mode == VOIDmode)
947 && GET_CODE (op) == REG
948 && REGNO (op) == AR_PFS_REGNUM);
951 /* Like general_operand, but don't allow (mem (addressof)). */
954 general_xfmode_operand (rtx op, enum machine_mode mode)
956 if (! general_operand (op, mode))
958 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
966 destination_xfmode_operand (rtx op, enum machine_mode mode)
968 if (! destination_operand (op, mode))
970 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
978 xfreg_or_fp01_operand (rtx op, enum machine_mode mode)
980 if (GET_CODE (op) == SUBREG)
982 return fr_reg_or_fp01_operand (op, mode);
985 /* Return 1 if OP is valid as a base register in a reg + offset address. */
988 basereg_operand (rtx op, enum machine_mode mode)
990 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
991 checks from pa.c basereg_operand as well? Seems to be OK without them
994 return (register_operand (op, mode) &&
995 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
1000 ADDR_AREA_NORMAL, /* normal address area */
1001 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
1005 static GTY(()) tree small_ident1;
1006 static GTY(()) tree small_ident2;
1011 if (small_ident1 == 0)
1013 small_ident1 = get_identifier ("small");
1014 small_ident2 = get_identifier ("__small__");
1018 /* Retrieve the address area that has been chosen for the given decl. */
1020 static ia64_addr_area
1021 ia64_get_addr_area (tree decl)
1025 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
1031 id = TREE_VALUE (TREE_VALUE (model_attr));
1032 if (id == small_ident1 || id == small_ident2)
1033 return ADDR_AREA_SMALL;
1035 return ADDR_AREA_NORMAL;
1039 ia64_handle_model_attribute (tree *node, tree name, tree args, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1041 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
1042 ia64_addr_area area;
1043 tree arg, decl = *node;
1046 arg = TREE_VALUE (args);
1047 if (arg == small_ident1 || arg == small_ident2)
1049 addr_area = ADDR_AREA_SMALL;
1053 warning ("invalid argument of `%s' attribute",
1054 IDENTIFIER_POINTER (name));
1055 *no_add_attrs = true;
1058 switch (TREE_CODE (decl))
1061 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
1063 && !TREE_STATIC (decl))
1065 error ("%Jan address area attribute cannot be specified for "
1066 "local variables", decl, decl);
1067 *no_add_attrs = true;
1069 area = ia64_get_addr_area (decl);
1070 if (area != ADDR_AREA_NORMAL && addr_area != area)
1072 error ("%Jaddress area of '%s' conflicts with previous "
1073 "declaration", decl, decl);
1074 *no_add_attrs = true;
1079 error ("%Jaddress area attribute cannot be specified for functions",
1081 *no_add_attrs = true;
1085 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1086 *no_add_attrs = true;
1094 ia64_encode_addr_area (tree decl, rtx symbol)
1098 flags = SYMBOL_REF_FLAGS (symbol);
1099 switch (ia64_get_addr_area (decl))
1101 case ADDR_AREA_NORMAL: break;
1102 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
1105 SYMBOL_REF_FLAGS (symbol) = flags;
1109 ia64_encode_section_info (tree decl, rtx rtl, int first)
1111 default_encode_section_info (decl, rtl, first);
1113 if (TREE_CODE (decl) == VAR_DECL
1114 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
1115 ia64_encode_addr_area (decl, XEXP (rtl, 0));
1118 /* Return 1 if the operands of a move are ok. */
1121 ia64_move_ok (rtx dst, rtx src)
1123 /* If we're under init_recog_no_volatile, we'll not be able to use
1124 memory_operand. So check the code directly and don't worry about
1125 the validity of the underlying address, which should have been
1126 checked elsewhere anyway. */
1127 if (GET_CODE (dst) != MEM)
1129 if (GET_CODE (src) == MEM)
1131 if (register_operand (src, VOIDmode))
1134 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
1135 if (INTEGRAL_MODE_P (GET_MODE (dst)))
1136 return src == const0_rtx;
1138 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1142 addp4_optimize_ok (rtx op1, rtx op2)
1144 return (basereg_operand (op1, GET_MODE(op1)) !=
1145 basereg_operand (op2, GET_MODE(op2)));
1148 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
1149 Return the length of the field, or <= 0 on failure. */
1152 ia64_depz_field_mask (rtx rop, rtx rshift)
1154 unsigned HOST_WIDE_INT op = INTVAL (rop);
1155 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1157 /* Get rid of the zero bits we're shifting in. */
1160 /* We must now have a solid block of 1's at bit 0. */
1161 return exact_log2 (op + 1);
1164 /* Expand a symbolic constant load. */
1167 ia64_expand_load_address (rtx dest, rtx src)
1169 if (tls_symbolic_operand (src, VOIDmode))
1171 if (GET_CODE (dest) != REG)
1174 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1175 having to pointer-extend the value afterward. Other forms of address
1176 computation below are also more natural to compute as 64-bit quantities.
1177 If we've been given an SImode destination register, change it. */
1178 if (GET_MODE (dest) != Pmode)
1179 dest = gen_rtx_REG (Pmode, REGNO (dest));
1181 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (src))
1183 emit_insn (gen_rtx_SET (VOIDmode, dest, src));
1186 else if (TARGET_AUTO_PIC)
1188 emit_insn (gen_load_gprel64 (dest, src));
1191 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1193 emit_insn (gen_load_fptr (dest, src));
1196 else if (sdata_symbolic_operand (src, VOIDmode))
1198 emit_insn (gen_load_gprel (dest, src));
1202 if (GET_CODE (src) == CONST
1203 && GET_CODE (XEXP (src, 0)) == PLUS
1204 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1205 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1207 rtx sym = XEXP (XEXP (src, 0), 0);
1208 HOST_WIDE_INT ofs, hi, lo;
1210 /* Split the offset into a sign extended 14-bit low part
1211 and a complementary high part. */
1212 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1213 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1216 ia64_expand_load_address (dest, plus_constant (sym, hi));
1217 emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
1223 tmp = gen_rtx_HIGH (Pmode, src);
1224 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1225 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1227 tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
1228 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1232 static GTY(()) rtx gen_tls_tga;
1234 gen_tls_get_addr (void)
1237 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1241 static GTY(()) rtx thread_pointer_rtx;
1243 gen_thread_pointer (void)
1245 if (!thread_pointer_rtx)
1247 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1248 RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1250 return thread_pointer_rtx;
1254 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1)
1256 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1261 case TLS_MODEL_GLOBAL_DYNAMIC:
1264 tga_op1 = gen_reg_rtx (Pmode);
1265 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1266 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1267 RTX_UNCHANGING_P (tga_op1) = 1;
1269 tga_op2 = gen_reg_rtx (Pmode);
1270 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1271 tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1272 RTX_UNCHANGING_P (tga_op2) = 1;
1274 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1275 LCT_CONST, Pmode, 2, tga_op1,
1276 Pmode, tga_op2, Pmode);
1278 insns = get_insns ();
1281 if (GET_MODE (op0) != Pmode)
1283 emit_libcall_block (insns, op0, tga_ret, op1);
1286 case TLS_MODEL_LOCAL_DYNAMIC:
1287 /* ??? This isn't the completely proper way to do local-dynamic
1288 If the call to __tls_get_addr is used only by a single symbol,
1289 then we should (somehow) move the dtprel to the second arg
1290 to avoid the extra add. */
1293 tga_op1 = gen_reg_rtx (Pmode);
1294 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1295 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1296 RTX_UNCHANGING_P (tga_op1) = 1;
1298 tga_op2 = const0_rtx;
1300 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1301 LCT_CONST, Pmode, 2, tga_op1,
1302 Pmode, tga_op2, Pmode);
1304 insns = get_insns ();
1307 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1309 tmp = gen_reg_rtx (Pmode);
1310 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1312 if (!register_operand (op0, Pmode))
1313 op0 = gen_reg_rtx (Pmode);
1316 emit_insn (gen_load_dtprel (op0, op1));
1317 emit_insn (gen_adddi3 (op0, tmp, op0));
1320 emit_insn (gen_add_dtprel (op0, tmp, op1));
1323 case TLS_MODEL_INITIAL_EXEC:
1324 tmp = gen_reg_rtx (Pmode);
1325 emit_insn (gen_load_ltoff_tprel (tmp, op1));
1326 tmp = gen_rtx_MEM (Pmode, tmp);
1327 RTX_UNCHANGING_P (tmp) = 1;
1328 tmp = force_reg (Pmode, tmp);
1330 if (!register_operand (op0, Pmode))
1331 op0 = gen_reg_rtx (Pmode);
1332 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1335 case TLS_MODEL_LOCAL_EXEC:
1336 if (!register_operand (op0, Pmode))
1337 op0 = gen_reg_rtx (Pmode);
1340 emit_insn (gen_load_tprel (op0, op1));
1341 emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
1344 emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
1351 if (orig_op0 == op0)
1353 if (GET_MODE (orig_op0) == Pmode)
1355 return gen_lowpart (GET_MODE (orig_op0), op0);
1359 ia64_expand_move (rtx op0, rtx op1)
1361 enum machine_mode mode = GET_MODE (op0);
1363 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1364 op1 = force_reg (mode, op1);
1366 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1368 enum tls_model tls_kind;
1369 if ((tls_kind = tls_symbolic_operand (op1, VOIDmode)))
1370 return ia64_expand_tls_address (tls_kind, op0, op1);
1372 if (!TARGET_NO_PIC && reload_completed)
1374 ia64_expand_load_address (op0, op1);
1382 /* Split a move from OP1 to OP0 conditional on COND. */
1385 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1387 rtx insn, first = get_last_insn ();
1389 emit_move_insn (op0, op1);
1391 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1393 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1397 /* Split a post-reload TImode or TFmode reference into two DImode
1398 components. This is made extra difficult by the fact that we do
1399 not get any scratch registers to work with, because reload cannot
1400 be prevented from giving us a scratch that overlaps the register
1401 pair involved. So instead, when addressing memory, we tweak the
1402 pointer register up and back down with POST_INCs. Or up and not
1403 back down when we can get away with it.
1405 REVERSED is true when the loads must be done in reversed order
1406 (high word first) for correctness. DEAD is true when the pointer
1407 dies with the second insn we generate and therefore the second
1408 address must not carry a postmodify.
1410 May return an insn which is to be emitted after the moves. */
1413 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1417 switch (GET_CODE (in))
1420 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1421 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1426 /* Cannot occur reversed. */
1427 if (reversed) abort ();
1429 if (GET_MODE (in) != TFmode)
1430 split_double (in, &out[0], &out[1]);
1432 /* split_double does not understand how to split a TFmode
1433 quantity into a pair of DImode constants. */
1436 unsigned HOST_WIDE_INT p[2];
1437 long l[4]; /* TFmode is 128 bits */
1439 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1440 real_to_target (l, &r, TFmode);
1442 if (FLOAT_WORDS_BIG_ENDIAN)
1444 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1445 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1449 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1450 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1452 out[0] = GEN_INT (p[0]);
1453 out[1] = GEN_INT (p[1]);
1459 rtx base = XEXP (in, 0);
1462 switch (GET_CODE (base))
1467 out[0] = adjust_automodify_address
1468 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1469 out[1] = adjust_automodify_address
1470 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1474 /* Reversal requires a pre-increment, which can only
1475 be done as a separate insn. */
1476 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1477 out[0] = adjust_automodify_address
1478 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1479 out[1] = adjust_address (in, DImode, 0);
1484 if (reversed || dead) abort ();
1485 /* Just do the increment in two steps. */
1486 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1487 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1491 if (reversed || dead) abort ();
1492 /* Add 8, subtract 24. */
1493 base = XEXP (base, 0);
1494 out[0] = adjust_automodify_address
1495 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1496 out[1] = adjust_automodify_address
1498 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1503 if (reversed || dead) abort ();
1504 /* Extract and adjust the modification. This case is
1505 trickier than the others, because we might have an
1506 index register, or we might have a combined offset that
1507 doesn't fit a signed 9-bit displacement field. We can
1508 assume the incoming expression is already legitimate. */
1509 offset = XEXP (base, 1);
1510 base = XEXP (base, 0);
1512 out[0] = adjust_automodify_address
1513 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1515 if (GET_CODE (XEXP (offset, 1)) == REG)
1517 /* Can't adjust the postmodify to match. Emit the
1518 original, then a separate addition insn. */
1519 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1520 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1522 else if (GET_CODE (XEXP (offset, 1)) != CONST_INT)
1524 else if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1526 /* Again the postmodify cannot be made to match, but
1527 in this case it's more efficient to get rid of the
1528 postmodify entirely and fix up with an add insn. */
1529 out[1] = adjust_automodify_address (in, DImode, base, 8);
1530 fixup = gen_adddi3 (base, base,
1531 GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1535 /* Combined offset still fits in the displacement field.
1536 (We cannot overflow it at the high end.) */
1537 out[1] = adjust_automodify_address
1539 gen_rtx_POST_MODIFY (Pmode, base,
1540 gen_rtx_PLUS (Pmode, base,
1541 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1559 /* Split a TImode or TFmode move instruction after reload.
1560 This is used by *movtf_internal and *movti_internal. */
1562 ia64_split_tmode_move (rtx operands[])
1564 rtx in[2], out[2], insn;
1567 bool reversed = false;
1569 /* It is possible for reload to decide to overwrite a pointer with
1570 the value it points to. In that case we have to do the loads in
1571 the appropriate order so that the pointer is not destroyed too
1572 early. Also we must not generate a postmodify for that second
1573 load, or rws_access_regno will abort. */
1574 if (GET_CODE (operands[1]) == MEM
1575 && reg_overlap_mentioned_p (operands[0], operands[1]))
1577 rtx base = XEXP (operands[1], 0);
1578 while (GET_CODE (base) != REG)
1579 base = XEXP (base, 0);
1581 if (REGNO (base) == REGNO (operands[0]))
1585 /* Another reason to do the moves in reversed order is if the first
1586 element of the target register pair is also the second element of
1587 the source register pair. */
1588 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1589 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1592 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1593 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1595 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1596 if (GET_CODE (EXP) == MEM \
1597 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1598 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1599 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1600 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1601 XEXP (XEXP (EXP, 0), 0), \
1604 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1605 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1606 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1608 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1609 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1610 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1613 emit_insn (fixup[0]);
1615 emit_insn (fixup[1]);
1617 #undef MAYBE_ADD_REG_INC_NOTE
1620 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1621 through memory plus an extra GR scratch register. Except that you can
1622 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1623 SECONDARY_RELOAD_CLASS, but not both.
1625 We got into problems in the first place by allowing a construct like
1626 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1627 This solution attempts to prevent this situation from occurring. When
1628 we see something like the above, we spill the inner register to memory. */
1631 spill_xfmode_operand (rtx in, int force)
1633 if (GET_CODE (in) == SUBREG
1634 && GET_MODE (SUBREG_REG (in)) == TImode
1635 && GET_CODE (SUBREG_REG (in)) == REG)
1637 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, /*rescan=*/true);
1638 return gen_rtx_MEM (XFmode, copy_to_reg (XEXP (mem, 0)));
1640 else if (force && GET_CODE (in) == REG)
1642 rtx mem = gen_mem_addressof (in, NULL_TREE, /*rescan=*/true);
1643 return gen_rtx_MEM (XFmode, copy_to_reg (XEXP (mem, 0)));
1645 else if (GET_CODE (in) == MEM
1646 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1647 return change_address (in, XFmode, copy_to_reg (XEXP (in, 0)));
1652 /* Emit comparison instruction if necessary, returning the expression
1653 that holds the compare result in the proper mode. */
1655 static GTY(()) rtx cmptf_libfunc;
1658 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1660 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1663 /* If we have a BImode input, then we already have a compare result, and
1664 do not need to emit another comparison. */
1665 if (GET_MODE (op0) == BImode)
1667 if ((code == NE || code == EQ) && op1 == const0_rtx)
1672 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1673 magic number as its third argument, that indicates what to do.
1674 The return value is an integer to be compared against zero. */
1675 else if (TARGET_HPUX && GET_MODE (op0) == TFmode)
1678 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1684 enum rtx_code ncode;
1686 if (GET_MODE (op1) != TFmode)
1690 /* 1 = equal, 0 = not equal. Equality operators do
1691 not raise FP_INVALID when given an SNaN operand. */
1692 case EQ: magic = QCMP_EQ; ncode = NE; break;
1693 case NE: magic = QCMP_EQ; ncode = EQ; break;
1694 /* isunordered() from C99. */
1695 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1696 /* Relational operators raise FP_INVALID when given
1698 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1699 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1700 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1701 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1702 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1703 Expanders for buneq etc. weuld have to be added to ia64.md
1704 for this to be useful. */
1710 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1711 op0, TFmode, op1, TFmode,
1712 GEN_INT (magic), DImode);
1713 cmp = gen_reg_rtx (BImode);
1714 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1715 gen_rtx_fmt_ee (ncode, BImode,
1718 insns = get_insns ();
1721 emit_libcall_block (insns, cmp, cmp,
1722 gen_rtx_fmt_ee (code, BImode, op0, op1));
1727 cmp = gen_reg_rtx (BImode);
1728 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1729 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1733 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1736 /* Emit the appropriate sequence for a call. */
1739 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1744 addr = XEXP (addr, 0);
1745 addr = convert_memory_address (DImode, addr);
1746 b0 = gen_rtx_REG (DImode, R_BR (0));
1748 /* ??? Should do this for functions known to bind local too. */
1749 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1752 insn = gen_sibcall_nogp (addr);
1754 insn = gen_call_nogp (addr, b0);
1756 insn = gen_call_value_nogp (retval, addr, b0);
1757 insn = emit_call_insn (insn);
1762 insn = gen_sibcall_gp (addr);
1764 insn = gen_call_gp (addr, b0);
1766 insn = gen_call_value_gp (retval, addr, b0);
1767 insn = emit_call_insn (insn);
1769 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1773 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1777 ia64_reload_gp (void)
1781 if (current_frame_info.reg_save_gp)
1782 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1785 HOST_WIDE_INT offset;
1787 offset = (current_frame_info.spill_cfa_off
1788 + current_frame_info.spill_size);
1789 if (frame_pointer_needed)
1791 tmp = hard_frame_pointer_rtx;
1796 tmp = stack_pointer_rtx;
1797 offset = current_frame_info.total_size - offset;
1800 if (CONST_OK_FOR_I (offset))
1801 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1802 tmp, GEN_INT (offset)));
1805 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1806 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1807 pic_offset_table_rtx, tmp));
1810 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1813 emit_move_insn (pic_offset_table_rtx, tmp);
1817 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1818 rtx scratch_b, int noreturn_p, int sibcall_p)
1821 bool is_desc = false;
1823 /* If we find we're calling through a register, then we're actually
1824 calling through a descriptor, so load up the values. */
1825 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1830 /* ??? We are currently constrained to *not* use peep2, because
1831 we can legitimately change the global lifetime of the GP
1832 (in the form of killing where previously live). This is
1833 because a call through a descriptor doesn't use the previous
1834 value of the GP, while a direct call does, and we do not
1835 commit to either form until the split here.
1837 That said, this means that we lack precise life info for
1838 whether ADDR is dead after this call. This is not terribly
1839 important, since we can fix things up essentially for free
1840 with the POST_DEC below, but it's nice to not use it when we
1841 can immediately tell it's not necessary. */
1842 addr_dead_p = ((noreturn_p || sibcall_p
1843 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1845 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1847 /* Load the code address into scratch_b. */
1848 tmp = gen_rtx_POST_INC (Pmode, addr);
1849 tmp = gen_rtx_MEM (Pmode, tmp);
1850 emit_move_insn (scratch_r, tmp);
1851 emit_move_insn (scratch_b, scratch_r);
1853 /* Load the GP address. If ADDR is not dead here, then we must
1854 revert the change made above via the POST_INCREMENT. */
1856 tmp = gen_rtx_POST_DEC (Pmode, addr);
1859 tmp = gen_rtx_MEM (Pmode, tmp);
1860 emit_move_insn (pic_offset_table_rtx, tmp);
1867 insn = gen_sibcall_nogp (addr);
1869 insn = gen_call_value_nogp (retval, addr, retaddr);
1871 insn = gen_call_nogp (addr, retaddr);
1872 emit_call_insn (insn);
1874 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1878 /* Begin the assembly file. */
1881 ia64_file_start (void)
1883 default_file_start ();
1884 emit_safe_across_calls ();
1888 emit_safe_across_calls (void)
1890 unsigned int rs, re;
1897 while (rs < 64 && call_used_regs[PR_REG (rs)])
1901 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1905 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1909 fputc (',', asm_out_file);
1911 fprintf (asm_out_file, "p%u", rs);
1913 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1917 fputc ('\n', asm_out_file);
1920 /* Helper function for ia64_compute_frame_size: find an appropriate general
1921 register to spill some special register to. SPECIAL_SPILL_MASK contains
1922 bits in GR0 to GR31 that have already been allocated by this routine.
1923 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1926 find_gr_spill (int try_locals)
1930 /* If this is a leaf function, first try an otherwise unused
1931 call-clobbered register. */
1932 if (current_function_is_leaf)
1934 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1935 if (! regs_ever_live[regno]
1936 && call_used_regs[regno]
1937 && ! fixed_regs[regno]
1938 && ! global_regs[regno]
1939 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1941 current_frame_info.gr_used_mask |= 1 << regno;
1948 regno = current_frame_info.n_local_regs;
1949 /* If there is a frame pointer, then we can't use loc79, because
1950 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1951 reg_name switching code in ia64_expand_prologue. */
1952 if (regno < (80 - frame_pointer_needed))
1954 current_frame_info.n_local_regs = regno + 1;
1955 return LOC_REG (0) + regno;
1959 /* Failed to find a general register to spill to. Must use stack. */
1963 /* In order to make for nice schedules, we try to allocate every temporary
1964 to a different register. We must of course stay away from call-saved,
1965 fixed, and global registers. We must also stay away from registers
1966 allocated in current_frame_info.gr_used_mask, since those include regs
1967 used all through the prologue.
1969 Any register allocated here must be used immediately. The idea is to
1970 aid scheduling, not to solve data flow problems. */
1972 static int last_scratch_gr_reg;
1975 next_scratch_gr_reg (void)
1979 for (i = 0; i < 32; ++i)
1981 regno = (last_scratch_gr_reg + i + 1) & 31;
1982 if (call_used_regs[regno]
1983 && ! fixed_regs[regno]
1984 && ! global_regs[regno]
1985 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1987 last_scratch_gr_reg = regno;
1992 /* There must be _something_ available. */
1996 /* Helper function for ia64_compute_frame_size, called through
1997 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2000 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2002 unsigned int regno = REGNO (reg);
2005 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2006 for (i = 0; i < n; ++i)
2007 current_frame_info.gr_used_mask |= 1 << (regno + i);
2011 /* Returns the number of bytes offset between the frame pointer and the stack
2012 pointer for the current function. SIZE is the number of bytes of space
2013 needed for local variables. */
2016 ia64_compute_frame_size (HOST_WIDE_INT size)
2018 HOST_WIDE_INT total_size;
2019 HOST_WIDE_INT spill_size = 0;
2020 HOST_WIDE_INT extra_spill_size = 0;
2021 HOST_WIDE_INT pretend_args_size;
2024 int spilled_gr_p = 0;
2025 int spilled_fr_p = 0;
2029 if (current_frame_info.initialized)
2032 memset (¤t_frame_info, 0, sizeof current_frame_info);
2033 CLEAR_HARD_REG_SET (mask);
2035 /* Don't allocate scratches to the return register. */
2036 diddle_return_value (mark_reg_gr_used_mask, NULL);
2038 /* Don't allocate scratches to the EH scratch registers. */
2039 if (cfun->machine->ia64_eh_epilogue_sp)
2040 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2041 if (cfun->machine->ia64_eh_epilogue_bsp)
2042 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2044 /* Find the size of the register stack frame. We have only 80 local
2045 registers, because we reserve 8 for the inputs and 8 for the
2048 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2049 since we'll be adjusting that down later. */
2050 regno = LOC_REG (78) + ! frame_pointer_needed;
2051 for (; regno >= LOC_REG (0); regno--)
2052 if (regs_ever_live[regno])
2054 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2056 /* For functions marked with the syscall_linkage attribute, we must mark
2057 all eight input registers as in use, so that locals aren't visible to
2060 if (cfun->machine->n_varargs > 0
2061 || lookup_attribute ("syscall_linkage",
2062 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2063 current_frame_info.n_input_regs = 8;
2066 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2067 if (regs_ever_live[regno])
2069 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2072 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2073 if (regs_ever_live[regno])
2075 i = regno - OUT_REG (0) + 1;
2077 /* When -p profiling, we need one output register for the mcount argument.
2078 Likewise for -a profiling for the bb_init_func argument. For -ax
2079 profiling, we need two output registers for the two bb_init_trace_func
2081 if (current_function_profile)
2083 current_frame_info.n_output_regs = i;
2085 /* ??? No rotating register support yet. */
2086 current_frame_info.n_rotate_regs = 0;
2088 /* Discover which registers need spilling, and how much room that
2089 will take. Begin with floating point and general registers,
2090 which will always wind up on the stack. */
2092 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2093 if (regs_ever_live[regno] && ! call_used_regs[regno])
2095 SET_HARD_REG_BIT (mask, regno);
2101 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2102 if (regs_ever_live[regno] && ! call_used_regs[regno])
2104 SET_HARD_REG_BIT (mask, regno);
2110 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2111 if (regs_ever_live[regno] && ! call_used_regs[regno])
2113 SET_HARD_REG_BIT (mask, regno);
2118 /* Now come all special registers that might get saved in other
2119 general registers. */
2121 if (frame_pointer_needed)
2123 current_frame_info.reg_fp = find_gr_spill (1);
2124 /* If we did not get a register, then we take LOC79. This is guaranteed
2125 to be free, even if regs_ever_live is already set, because this is
2126 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2127 as we don't count loc79 above. */
2128 if (current_frame_info.reg_fp == 0)
2130 current_frame_info.reg_fp = LOC_REG (79);
2131 current_frame_info.n_local_regs++;
2135 if (! current_function_is_leaf)
2137 /* Emit a save of BR0 if we call other functions. Do this even
2138 if this function doesn't return, as EH depends on this to be
2139 able to unwind the stack. */
2140 SET_HARD_REG_BIT (mask, BR_REG (0));
2142 current_frame_info.reg_save_b0 = find_gr_spill (1);
2143 if (current_frame_info.reg_save_b0 == 0)
2149 /* Similarly for ar.pfs. */
2150 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2151 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2152 if (current_frame_info.reg_save_ar_pfs == 0)
2154 extra_spill_size += 8;
2158 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2159 registers are clobbered, so we fall back to the stack. */
2160 current_frame_info.reg_save_gp
2161 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2162 if (current_frame_info.reg_save_gp == 0)
2164 SET_HARD_REG_BIT (mask, GR_REG (1));
2171 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2173 SET_HARD_REG_BIT (mask, BR_REG (0));
2178 if (regs_ever_live[AR_PFS_REGNUM])
2180 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2181 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2182 if (current_frame_info.reg_save_ar_pfs == 0)
2184 extra_spill_size += 8;
2190 /* Unwind descriptor hackery: things are most efficient if we allocate
2191 consecutive GR save registers for RP, PFS, FP in that order. However,
2192 it is absolutely critical that FP get the only hard register that's
2193 guaranteed to be free, so we allocated it first. If all three did
2194 happen to be allocated hard regs, and are consecutive, rearrange them
2195 into the preferred order now. */
2196 if (current_frame_info.reg_fp != 0
2197 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2198 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2200 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2201 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2202 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2205 /* See if we need to store the predicate register block. */
2206 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2207 if (regs_ever_live[regno] && ! call_used_regs[regno])
2209 if (regno <= PR_REG (63))
2211 SET_HARD_REG_BIT (mask, PR_REG (0));
2212 current_frame_info.reg_save_pr = find_gr_spill (1);
2213 if (current_frame_info.reg_save_pr == 0)
2215 extra_spill_size += 8;
2219 /* ??? Mark them all as used so that register renaming and such
2220 are free to use them. */
2221 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2222 regs_ever_live[regno] = 1;
2225 /* If we're forced to use st8.spill, we're forced to save and restore
2226 ar.unat as well. The check for existing liveness allows inline asm
2227 to touch ar.unat. */
2228 if (spilled_gr_p || cfun->machine->n_varargs
2229 || regs_ever_live[AR_UNAT_REGNUM])
2231 regs_ever_live[AR_UNAT_REGNUM] = 1;
2232 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2233 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2234 if (current_frame_info.reg_save_ar_unat == 0)
2236 extra_spill_size += 8;
2241 if (regs_ever_live[AR_LC_REGNUM])
2243 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2244 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2245 if (current_frame_info.reg_save_ar_lc == 0)
2247 extra_spill_size += 8;
2252 /* If we have an odd number of words of pretend arguments written to
2253 the stack, then the FR save area will be unaligned. We round the
2254 size of this area up to keep things 16 byte aligned. */
2256 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2258 pretend_args_size = current_function_pretend_args_size;
2260 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2261 + current_function_outgoing_args_size);
2262 total_size = IA64_STACK_ALIGN (total_size);
2264 /* We always use the 16-byte scratch area provided by the caller, but
2265 if we are a leaf function, there's no one to which we need to provide
2267 if (current_function_is_leaf)
2268 total_size = MAX (0, total_size - 16);
2270 current_frame_info.total_size = total_size;
2271 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2272 current_frame_info.spill_size = spill_size;
2273 current_frame_info.extra_spill_size = extra_spill_size;
2274 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2275 current_frame_info.n_spilled = n_spilled;
2276 current_frame_info.initialized = reload_completed;
2279 /* Compute the initial difference between the specified pair of registers. */
2282 ia64_initial_elimination_offset (int from, int to)
2284 HOST_WIDE_INT offset;
2286 ia64_compute_frame_size (get_frame_size ());
2289 case FRAME_POINTER_REGNUM:
2290 if (to == HARD_FRAME_POINTER_REGNUM)
2292 if (current_function_is_leaf)
2293 offset = -current_frame_info.total_size;
2295 offset = -(current_frame_info.total_size
2296 - current_function_outgoing_args_size - 16);
2298 else if (to == STACK_POINTER_REGNUM)
2300 if (current_function_is_leaf)
2303 offset = 16 + current_function_outgoing_args_size;
2309 case ARG_POINTER_REGNUM:
2310 /* Arguments start above the 16 byte save area, unless stdarg
2311 in which case we store through the 16 byte save area. */
2312 if (to == HARD_FRAME_POINTER_REGNUM)
2313 offset = 16 - current_function_pretend_args_size;
2314 else if (to == STACK_POINTER_REGNUM)
2315 offset = (current_frame_info.total_size
2316 + 16 - current_function_pretend_args_size);
2328 /* If there are more than a trivial number of register spills, we use
2329 two interleaved iterators so that we can get two memory references
2332 In order to simplify things in the prologue and epilogue expanders,
2333 we use helper functions to fix up the memory references after the
2334 fact with the appropriate offsets to a POST_MODIFY memory mode.
2335 The following data structure tracks the state of the two iterators
2336 while insns are being emitted. */
2338 struct spill_fill_data
2340 rtx init_after; /* point at which to emit initializations */
2341 rtx init_reg[2]; /* initial base register */
2342 rtx iter_reg[2]; /* the iterator registers */
2343 rtx *prev_addr[2]; /* address of last memory use */
2344 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2345 HOST_WIDE_INT prev_off[2]; /* last offset */
2346 int n_iter; /* number of iterators in use */
2347 int next_iter; /* next iterator to use */
2348 unsigned int save_gr_used_mask;
2351 static struct spill_fill_data spill_fill_data;
2354 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2358 spill_fill_data.init_after = get_last_insn ();
2359 spill_fill_data.init_reg[0] = init_reg;
2360 spill_fill_data.init_reg[1] = init_reg;
2361 spill_fill_data.prev_addr[0] = NULL;
2362 spill_fill_data.prev_addr[1] = NULL;
2363 spill_fill_data.prev_insn[0] = NULL;
2364 spill_fill_data.prev_insn[1] = NULL;
2365 spill_fill_data.prev_off[0] = cfa_off;
2366 spill_fill_data.prev_off[1] = cfa_off;
2367 spill_fill_data.next_iter = 0;
2368 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2370 spill_fill_data.n_iter = 1 + (n_spills > 2);
2371 for (i = 0; i < spill_fill_data.n_iter; ++i)
2373 int regno = next_scratch_gr_reg ();
2374 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2375 current_frame_info.gr_used_mask |= 1 << regno;
2380 finish_spill_pointers (void)
2382 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2386 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2388 int iter = spill_fill_data.next_iter;
2389 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2390 rtx disp_rtx = GEN_INT (disp);
2393 if (spill_fill_data.prev_addr[iter])
2395 if (CONST_OK_FOR_N (disp))
2397 *spill_fill_data.prev_addr[iter]
2398 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2399 gen_rtx_PLUS (DImode,
2400 spill_fill_data.iter_reg[iter],
2402 REG_NOTES (spill_fill_data.prev_insn[iter])
2403 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2404 REG_NOTES (spill_fill_data.prev_insn[iter]));
2408 /* ??? Could use register post_modify for loads. */
2409 if (! CONST_OK_FOR_I (disp))
2411 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2412 emit_move_insn (tmp, disp_rtx);
2415 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2416 spill_fill_data.iter_reg[iter], disp_rtx));
2419 /* Micro-optimization: if we've created a frame pointer, it's at
2420 CFA 0, which may allow the real iterator to be initialized lower,
2421 slightly increasing parallelism. Also, if there are few saves
2422 it may eliminate the iterator entirely. */
2424 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2425 && frame_pointer_needed)
2427 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2428 set_mem_alias_set (mem, get_varargs_alias_set ());
2436 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2437 spill_fill_data.init_reg[iter]);
2442 if (! CONST_OK_FOR_I (disp))
2444 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2445 emit_move_insn (tmp, disp_rtx);
2449 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2450 spill_fill_data.init_reg[iter],
2457 /* Careful for being the first insn in a sequence. */
2458 if (spill_fill_data.init_after)
2459 insn = emit_insn_after (seq, spill_fill_data.init_after);
2462 rtx first = get_insns ();
2464 insn = emit_insn_before (seq, first);
2466 insn = emit_insn (seq);
2468 spill_fill_data.init_after = insn;
2470 /* If DISP is 0, we may or may not have a further adjustment
2471 afterward. If we do, then the load/store insn may be modified
2472 to be a post-modify. If we don't, then this copy may be
2473 eliminated by copyprop_hardreg_forward, which makes this
2474 insn garbage, which runs afoul of the sanity check in
2475 propagate_one_insn. So mark this insn as legal to delete. */
2477 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2481 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2483 /* ??? Not all of the spills are for varargs, but some of them are.
2484 The rest of the spills belong in an alias set of their own. But
2485 it doesn't actually hurt to include them here. */
2486 set_mem_alias_set (mem, get_varargs_alias_set ());
2488 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2489 spill_fill_data.prev_off[iter] = cfa_off;
2491 if (++iter >= spill_fill_data.n_iter)
2493 spill_fill_data.next_iter = iter;
2499 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2502 int iter = spill_fill_data.next_iter;
2505 mem = spill_restore_mem (reg, cfa_off);
2506 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2507 spill_fill_data.prev_insn[iter] = insn;
2514 RTX_FRAME_RELATED_P (insn) = 1;
2516 /* Don't even pretend that the unwind code can intuit its way
2517 through a pair of interleaved post_modify iterators. Just
2518 provide the correct answer. */
2520 if (frame_pointer_needed)
2522 base = hard_frame_pointer_rtx;
2527 base = stack_pointer_rtx;
2528 off = current_frame_info.total_size - cfa_off;
2532 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2533 gen_rtx_SET (VOIDmode,
2534 gen_rtx_MEM (GET_MODE (reg),
2535 plus_constant (base, off)),
2542 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2544 int iter = spill_fill_data.next_iter;
2547 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2548 GEN_INT (cfa_off)));
2549 spill_fill_data.prev_insn[iter] = insn;
2552 /* Wrapper functions that discards the CONST_INT spill offset. These
2553 exist so that we can give gr_spill/gr_fill the offset they need and
2554 use a consistent function interface. */
2557 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2559 return gen_movdi (dest, src);
2563 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2565 return gen_fr_spill (dest, src);
2569 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2571 return gen_fr_restore (dest, src);
2574 /* Called after register allocation to add any instructions needed for the
2575 prologue. Using a prologue insn is favored compared to putting all of the
2576 instructions in output_function_prologue(), since it allows the scheduler
2577 to intermix instructions with the saves of the caller saved registers. In
2578 some cases, it might be necessary to emit a barrier instruction as the last
2579 insn to prevent such scheduling.
2581 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2582 so that the debug info generation code can handle them properly.
2584 The register save area is layed out like so:
2586 [ varargs spill area ]
2587 [ fr register spill area ]
2588 [ br register spill area ]
2589 [ ar register spill area ]
2590 [ pr register spill area ]
2591 [ gr register spill area ] */
2593 /* ??? Get inefficient code when the frame size is larger than can fit in an
2594 adds instruction. */
2597 ia64_expand_prologue (void)
2599 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2600 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2603 ia64_compute_frame_size (get_frame_size ());
2604 last_scratch_gr_reg = 15;
2606 /* If there is no epilogue, then we don't need some prologue insns.
2607 We need to avoid emitting the dead prologue insns, because flow
2608 will complain about them. */
2613 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2614 if ((e->flags & EDGE_FAKE) == 0
2615 && (e->flags & EDGE_FALLTHRU) != 0)
2617 epilogue_p = (e != NULL);
2622 /* Set the local, input, and output register names. We need to do this
2623 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2624 half. If we use in/loc/out register names, then we get assembler errors
2625 in crtn.S because there is no alloc insn or regstk directive in there. */
2626 if (! TARGET_REG_NAMES)
2628 int inputs = current_frame_info.n_input_regs;
2629 int locals = current_frame_info.n_local_regs;
2630 int outputs = current_frame_info.n_output_regs;
2632 for (i = 0; i < inputs; i++)
2633 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2634 for (i = 0; i < locals; i++)
2635 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2636 for (i = 0; i < outputs; i++)
2637 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2640 /* Set the frame pointer register name. The regnum is logically loc79,
2641 but of course we'll not have allocated that many locals. Rather than
2642 worrying about renumbering the existing rtxs, we adjust the name. */
2643 /* ??? This code means that we can never use one local register when
2644 there is a frame pointer. loc79 gets wasted in this case, as it is
2645 renamed to a register that will never be used. See also the try_locals
2646 code in find_gr_spill. */
2647 if (current_frame_info.reg_fp)
2649 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2650 reg_names[HARD_FRAME_POINTER_REGNUM]
2651 = reg_names[current_frame_info.reg_fp];
2652 reg_names[current_frame_info.reg_fp] = tmp;
2655 /* We don't need an alloc instruction if we've used no outputs or locals. */
2656 if (current_frame_info.n_local_regs == 0
2657 && current_frame_info.n_output_regs == 0
2658 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2659 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2661 /* If there is no alloc, but there are input registers used, then we
2662 need a .regstk directive. */
2663 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2664 ar_pfs_save_reg = NULL_RTX;
2668 current_frame_info.need_regstk = 0;
2670 if (current_frame_info.reg_save_ar_pfs)
2671 regno = current_frame_info.reg_save_ar_pfs;
2673 regno = next_scratch_gr_reg ();
2674 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2676 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2677 GEN_INT (current_frame_info.n_input_regs),
2678 GEN_INT (current_frame_info.n_local_regs),
2679 GEN_INT (current_frame_info.n_output_regs),
2680 GEN_INT (current_frame_info.n_rotate_regs)));
2681 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2684 /* Set up frame pointer, stack pointer, and spill iterators. */
2686 n_varargs = cfun->machine->n_varargs;
2687 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2688 stack_pointer_rtx, 0);
2690 if (frame_pointer_needed)
2692 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2693 RTX_FRAME_RELATED_P (insn) = 1;
2696 if (current_frame_info.total_size != 0)
2698 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2701 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2702 offset = frame_size_rtx;
2705 regno = next_scratch_gr_reg ();
2706 offset = gen_rtx_REG (DImode, regno);
2707 emit_move_insn (offset, frame_size_rtx);
2710 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2711 stack_pointer_rtx, offset));
2713 if (! frame_pointer_needed)
2715 RTX_FRAME_RELATED_P (insn) = 1;
2716 if (GET_CODE (offset) != CONST_INT)
2719 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2720 gen_rtx_SET (VOIDmode,
2722 gen_rtx_PLUS (DImode,
2729 /* ??? At this point we must generate a magic insn that appears to
2730 modify the stack pointer, the frame pointer, and all spill
2731 iterators. This would allow the most scheduling freedom. For
2732 now, just hard stop. */
2733 emit_insn (gen_blockage ());
2736 /* Must copy out ar.unat before doing any integer spills. */
2737 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2739 if (current_frame_info.reg_save_ar_unat)
2741 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2744 alt_regno = next_scratch_gr_reg ();
2745 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2746 current_frame_info.gr_used_mask |= 1 << alt_regno;
2749 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2750 insn = emit_move_insn (ar_unat_save_reg, reg);
2751 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2753 /* Even if we're not going to generate an epilogue, we still
2754 need to save the register so that EH works. */
2755 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2756 emit_insn (gen_prologue_use (ar_unat_save_reg));
2759 ar_unat_save_reg = NULL_RTX;
2761 /* Spill all varargs registers. Do this before spilling any GR registers,
2762 since we want the UNAT bits for the GR registers to override the UNAT
2763 bits from varargs, which we don't care about. */
2766 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2768 reg = gen_rtx_REG (DImode, regno);
2769 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2772 /* Locate the bottom of the register save area. */
2773 cfa_off = (current_frame_info.spill_cfa_off
2774 + current_frame_info.spill_size
2775 + current_frame_info.extra_spill_size);
2777 /* Save the predicate register block either in a register or in memory. */
2778 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2780 reg = gen_rtx_REG (DImode, PR_REG (0));
2781 if (current_frame_info.reg_save_pr != 0)
2783 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2784 insn = emit_move_insn (alt_reg, reg);
2786 /* ??? Denote pr spill/fill by a DImode move that modifies all
2787 64 hard registers. */
2788 RTX_FRAME_RELATED_P (insn) = 1;
2790 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2791 gen_rtx_SET (VOIDmode, alt_reg, reg),
2794 /* Even if we're not going to generate an epilogue, we still
2795 need to save the register so that EH works. */
2797 emit_insn (gen_prologue_use (alt_reg));
2801 alt_regno = next_scratch_gr_reg ();
2802 alt_reg = gen_rtx_REG (DImode, alt_regno);
2803 insn = emit_move_insn (alt_reg, reg);
2804 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2809 /* Handle AR regs in numerical order. All of them get special handling. */
2810 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2811 && current_frame_info.reg_save_ar_unat == 0)
2813 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2814 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2818 /* The alloc insn already copied ar.pfs into a general register. The
2819 only thing we have to do now is copy that register to a stack slot
2820 if we'd not allocated a local register for the job. */
2821 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2822 && current_frame_info.reg_save_ar_pfs == 0)
2824 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2825 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2829 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2831 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2832 if (current_frame_info.reg_save_ar_lc != 0)
2834 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2835 insn = emit_move_insn (alt_reg, reg);
2836 RTX_FRAME_RELATED_P (insn) = 1;
2838 /* Even if we're not going to generate an epilogue, we still
2839 need to save the register so that EH works. */
2841 emit_insn (gen_prologue_use (alt_reg));
2845 alt_regno = next_scratch_gr_reg ();
2846 alt_reg = gen_rtx_REG (DImode, alt_regno);
2847 emit_move_insn (alt_reg, reg);
2848 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2853 if (current_frame_info.reg_save_gp)
2855 insn = emit_move_insn (gen_rtx_REG (DImode,
2856 current_frame_info.reg_save_gp),
2857 pic_offset_table_rtx);
2858 /* We don't know for sure yet if this is actually needed, since
2859 we've not split the PIC call patterns. If all of the calls
2860 are indirect, and not followed by any uses of the gp, then
2861 this save is dead. Allow it to go away. */
2863 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2866 /* We should now be at the base of the gr/br/fr spill area. */
2867 if (cfa_off != (current_frame_info.spill_cfa_off
2868 + current_frame_info.spill_size))
2871 /* Spill all general registers. */
2872 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2873 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2875 reg = gen_rtx_REG (DImode, regno);
2876 do_spill (gen_gr_spill, reg, cfa_off, reg);
2880 /* Handle BR0 specially -- it may be getting stored permanently in
2881 some GR register. */
2882 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2884 reg = gen_rtx_REG (DImode, BR_REG (0));
2885 if (current_frame_info.reg_save_b0 != 0)
2887 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2888 insn = emit_move_insn (alt_reg, reg);
2889 RTX_FRAME_RELATED_P (insn) = 1;
2891 /* Even if we're not going to generate an epilogue, we still
2892 need to save the register so that EH works. */
2894 emit_insn (gen_prologue_use (alt_reg));
2898 alt_regno = next_scratch_gr_reg ();
2899 alt_reg = gen_rtx_REG (DImode, alt_regno);
2900 emit_move_insn (alt_reg, reg);
2901 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2906 /* Spill the rest of the BR registers. */
2907 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2908 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2910 alt_regno = next_scratch_gr_reg ();
2911 alt_reg = gen_rtx_REG (DImode, alt_regno);
2912 reg = gen_rtx_REG (DImode, regno);
2913 emit_move_insn (alt_reg, reg);
2914 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2918 /* Align the frame and spill all FR registers. */
2919 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2920 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2924 reg = gen_rtx_REG (XFmode, regno);
2925 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2929 if (cfa_off != current_frame_info.spill_cfa_off)
2932 finish_spill_pointers ();
2935 /* Called after register allocation to add any instructions needed for the
2936 epilogue. Using an epilogue insn is favored compared to putting all of the
2937 instructions in output_function_prologue(), since it allows the scheduler
2938 to intermix instructions with the saves of the caller saved registers. In
2939 some cases, it might be necessary to emit a barrier instruction as the last
2940 insn to prevent such scheduling. */
2943 ia64_expand_epilogue (int sibcall_p)
2945 rtx insn, reg, alt_reg, ar_unat_save_reg;
2946 int regno, alt_regno, cfa_off;
2948 ia64_compute_frame_size (get_frame_size ());
2950 /* If there is a frame pointer, then we use it instead of the stack
2951 pointer, so that the stack pointer does not need to be valid when
2952 the epilogue starts. See EXIT_IGNORE_STACK. */
2953 if (frame_pointer_needed)
2954 setup_spill_pointers (current_frame_info.n_spilled,
2955 hard_frame_pointer_rtx, 0);
2957 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2958 current_frame_info.total_size);
2960 if (current_frame_info.total_size != 0)
2962 /* ??? At this point we must generate a magic insn that appears to
2963 modify the spill iterators and the frame pointer. This would
2964 allow the most scheduling freedom. For now, just hard stop. */
2965 emit_insn (gen_blockage ());
2968 /* Locate the bottom of the register save area. */
2969 cfa_off = (current_frame_info.spill_cfa_off
2970 + current_frame_info.spill_size
2971 + current_frame_info.extra_spill_size);
2973 /* Restore the predicate registers. */
2974 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2976 if (current_frame_info.reg_save_pr != 0)
2977 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2980 alt_regno = next_scratch_gr_reg ();
2981 alt_reg = gen_rtx_REG (DImode, alt_regno);
2982 do_restore (gen_movdi_x, alt_reg, cfa_off);
2985 reg = gen_rtx_REG (DImode, PR_REG (0));
2986 emit_move_insn (reg, alt_reg);
2989 /* Restore the application registers. */
2991 /* Load the saved unat from the stack, but do not restore it until
2992 after the GRs have been restored. */
2993 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2995 if (current_frame_info.reg_save_ar_unat != 0)
2997 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3000 alt_regno = next_scratch_gr_reg ();
3001 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3002 current_frame_info.gr_used_mask |= 1 << alt_regno;
3003 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3008 ar_unat_save_reg = NULL_RTX;
3010 if (current_frame_info.reg_save_ar_pfs != 0)
3012 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
3013 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3014 emit_move_insn (reg, alt_reg);
3016 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3018 alt_regno = next_scratch_gr_reg ();
3019 alt_reg = gen_rtx_REG (DImode, alt_regno);
3020 do_restore (gen_movdi_x, alt_reg, cfa_off);
3022 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3023 emit_move_insn (reg, alt_reg);
3026 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3028 if (current_frame_info.reg_save_ar_lc != 0)
3029 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3032 alt_regno = next_scratch_gr_reg ();
3033 alt_reg = gen_rtx_REG (DImode, alt_regno);
3034 do_restore (gen_movdi_x, alt_reg, cfa_off);
3037 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3038 emit_move_insn (reg, alt_reg);
3041 /* We should now be at the base of the gr/br/fr spill area. */
3042 if (cfa_off != (current_frame_info.spill_cfa_off
3043 + current_frame_info.spill_size))
3046 /* The GP may be stored on the stack in the prologue, but it's
3047 never restored in the epilogue. Skip the stack slot. */
3048 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3051 /* Restore all general registers. */
3052 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3053 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3055 reg = gen_rtx_REG (DImode, regno);
3056 do_restore (gen_gr_restore, reg, cfa_off);
3060 /* Restore the branch registers. Handle B0 specially, as it may
3061 have gotten stored in some GR register. */
3062 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3064 if (current_frame_info.reg_save_b0 != 0)
3065 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3068 alt_regno = next_scratch_gr_reg ();
3069 alt_reg = gen_rtx_REG (DImode, alt_regno);
3070 do_restore (gen_movdi_x, alt_reg, cfa_off);
3073 reg = gen_rtx_REG (DImode, BR_REG (0));
3074 emit_move_insn (reg, alt_reg);
3077 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3078 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3080 alt_regno = next_scratch_gr_reg ();
3081 alt_reg = gen_rtx_REG (DImode, alt_regno);
3082 do_restore (gen_movdi_x, alt_reg, cfa_off);
3084 reg = gen_rtx_REG (DImode, regno);
3085 emit_move_insn (reg, alt_reg);
3088 /* Restore floating point registers. */
3089 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3090 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3094 reg = gen_rtx_REG (XFmode, regno);
3095 do_restore (gen_fr_restore_x, reg, cfa_off);
3099 /* Restore ar.unat for real. */
3100 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3102 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3103 emit_move_insn (reg, ar_unat_save_reg);
3106 if (cfa_off != current_frame_info.spill_cfa_off)
3109 finish_spill_pointers ();
3111 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3113 /* ??? At this point we must generate a magic insn that appears to
3114 modify the spill iterators, the stack pointer, and the frame
3115 pointer. This would allow the most scheduling freedom. For now,
3117 emit_insn (gen_blockage ());
3120 if (cfun->machine->ia64_eh_epilogue_sp)
3121 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3122 else if (frame_pointer_needed)
3124 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3125 RTX_FRAME_RELATED_P (insn) = 1;
3127 else if (current_frame_info.total_size)
3129 rtx offset, frame_size_rtx;
3131 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3132 if (CONST_OK_FOR_I (current_frame_info.total_size))
3133 offset = frame_size_rtx;
3136 regno = next_scratch_gr_reg ();
3137 offset = gen_rtx_REG (DImode, regno);
3138 emit_move_insn (offset, frame_size_rtx);
3141 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3144 RTX_FRAME_RELATED_P (insn) = 1;
3145 if (GET_CODE (offset) != CONST_INT)
3148 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3149 gen_rtx_SET (VOIDmode,
3151 gen_rtx_PLUS (DImode,
3158 if (cfun->machine->ia64_eh_epilogue_bsp)
3159 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3162 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3165 int fp = GR_REG (2);
3166 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3167 first available call clobbered register. If there was a frame_pointer
3168 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3169 so we have to make sure we're using the string "r2" when emitting
3170 the register name for the assembler. */
3171 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3172 fp = HARD_FRAME_POINTER_REGNUM;
3174 /* We must emit an alloc to force the input registers to become output
3175 registers. Otherwise, if the callee tries to pass its parameters
3176 through to another call without an intervening alloc, then these
3178 /* ??? We don't need to preserve all input registers. We only need to
3179 preserve those input registers used as arguments to the sibling call.
3180 It is unclear how to compute that number here. */
3181 if (current_frame_info.n_input_regs != 0)
3182 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3183 GEN_INT (0), GEN_INT (0),
3184 GEN_INT (current_frame_info.n_input_regs),
3189 /* Return 1 if br.ret can do all the work required to return from a
3193 ia64_direct_return (void)
3195 if (reload_completed && ! frame_pointer_needed)
3197 ia64_compute_frame_size (get_frame_size ());
3199 return (current_frame_info.total_size == 0
3200 && current_frame_info.n_spilled == 0
3201 && current_frame_info.reg_save_b0 == 0
3202 && current_frame_info.reg_save_pr == 0
3203 && current_frame_info.reg_save_ar_pfs == 0
3204 && current_frame_info.reg_save_ar_unat == 0
3205 && current_frame_info.reg_save_ar_lc == 0);
3210 /* Return the magic cookie that we use to hold the return address
3211 during early compilation. */
3214 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3218 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3221 /* Split this value after reload, now that we know where the return
3222 address is saved. */
3225 ia64_split_return_addr_rtx (rtx dest)
3229 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3231 if (current_frame_info.reg_save_b0 != 0)
3232 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3238 /* Compute offset from CFA for BR0. */
3239 /* ??? Must be kept in sync with ia64_expand_prologue. */
3240 off = (current_frame_info.spill_cfa_off
3241 + current_frame_info.spill_size);
3242 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3243 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3246 /* Convert CFA offset to a register based offset. */
3247 if (frame_pointer_needed)
3248 src = hard_frame_pointer_rtx;
3251 src = stack_pointer_rtx;
3252 off += current_frame_info.total_size;
3255 /* Load address into scratch register. */
3256 if (CONST_OK_FOR_I (off))
3257 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3260 emit_move_insn (dest, GEN_INT (off));
3261 emit_insn (gen_adddi3 (dest, src, dest));
3264 src = gen_rtx_MEM (Pmode, dest);
3268 src = gen_rtx_REG (DImode, BR_REG (0));
3270 emit_move_insn (dest, src);
3274 ia64_hard_regno_rename_ok (int from, int to)
3276 /* Don't clobber any of the registers we reserved for the prologue. */
3277 if (to == current_frame_info.reg_fp
3278 || to == current_frame_info.reg_save_b0
3279 || to == current_frame_info.reg_save_pr
3280 || to == current_frame_info.reg_save_ar_pfs
3281 || to == current_frame_info.reg_save_ar_unat
3282 || to == current_frame_info.reg_save_ar_lc)
3285 if (from == current_frame_info.reg_fp
3286 || from == current_frame_info.reg_save_b0
3287 || from == current_frame_info.reg_save_pr
3288 || from == current_frame_info.reg_save_ar_pfs
3289 || from == current_frame_info.reg_save_ar_unat
3290 || from == current_frame_info.reg_save_ar_lc)
3293 /* Don't use output registers outside the register frame. */
3294 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3297 /* Retain even/oddness on predicate register pairs. */
3298 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3299 return (from & 1) == (to & 1);
3304 /* Target hook for assembling integer objects. Handle word-sized
3305 aligned objects and detect the cases when @fptr is needed. */
3308 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3310 if (size == POINTER_SIZE / BITS_PER_UNIT
3312 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3313 && GET_CODE (x) == SYMBOL_REF
3314 && SYMBOL_REF_FUNCTION_P (x))
3316 if (POINTER_SIZE == 32)
3317 fputs ("\tdata4\t@fptr(", asm_out_file);
3319 fputs ("\tdata8\t@fptr(", asm_out_file);
3320 output_addr_const (asm_out_file, x);
3321 fputs (")\n", asm_out_file);
3324 return default_assemble_integer (x, size, aligned_p);
3327 /* Emit the function prologue. */
3330 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3332 int mask, grsave, grsave_prev;
3334 if (current_frame_info.need_regstk)
3335 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3336 current_frame_info.n_input_regs,
3337 current_frame_info.n_local_regs,
3338 current_frame_info.n_output_regs,
3339 current_frame_info.n_rotate_regs);
3341 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3344 /* Emit the .prologue directive. */
3347 grsave = grsave_prev = 0;
3348 if (current_frame_info.reg_save_b0 != 0)
3351 grsave = grsave_prev = current_frame_info.reg_save_b0;
3353 if (current_frame_info.reg_save_ar_pfs != 0
3354 && (grsave_prev == 0
3355 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3358 if (grsave_prev == 0)
3359 grsave = current_frame_info.reg_save_ar_pfs;
3360 grsave_prev = current_frame_info.reg_save_ar_pfs;
3362 if (current_frame_info.reg_fp != 0
3363 && (grsave_prev == 0
3364 || current_frame_info.reg_fp == grsave_prev + 1))
3367 if (grsave_prev == 0)
3368 grsave = HARD_FRAME_POINTER_REGNUM;
3369 grsave_prev = current_frame_info.reg_fp;
3371 if (current_frame_info.reg_save_pr != 0
3372 && (grsave_prev == 0
3373 || current_frame_info.reg_save_pr == grsave_prev + 1))
3376 if (grsave_prev == 0)
3377 grsave = current_frame_info.reg_save_pr;
3380 if (mask && TARGET_GNU_AS)
3381 fprintf (file, "\t.prologue %d, %d\n", mask,
3382 ia64_dbx_register_number (grsave));
3384 fputs ("\t.prologue\n", file);
3386 /* Emit a .spill directive, if necessary, to relocate the base of
3387 the register spill area. */
3388 if (current_frame_info.spill_cfa_off != -16)
3389 fprintf (file, "\t.spill %ld\n",
3390 (long) (current_frame_info.spill_cfa_off
3391 + current_frame_info.spill_size));
3394 /* Emit the .body directive at the scheduled end of the prologue. */
3397 ia64_output_function_end_prologue (FILE *file)
3399 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3402 fputs ("\t.body\n", file);
3405 /* Emit the function epilogue. */
3408 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3409 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3413 if (current_frame_info.reg_fp)
3415 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3416 reg_names[HARD_FRAME_POINTER_REGNUM]
3417 = reg_names[current_frame_info.reg_fp];
3418 reg_names[current_frame_info.reg_fp] = tmp;
3420 if (! TARGET_REG_NAMES)
3422 for (i = 0; i < current_frame_info.n_input_regs; i++)
3423 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3424 for (i = 0; i < current_frame_info.n_local_regs; i++)
3425 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3426 for (i = 0; i < current_frame_info.n_output_regs; i++)
3427 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3430 current_frame_info.initialized = 0;
3434 ia64_dbx_register_number (int regno)
3436 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3437 from its home at loc79 to something inside the register frame. We
3438 must perform the same renumbering here for the debug info. */
3439 if (current_frame_info.reg_fp)
3441 if (regno == HARD_FRAME_POINTER_REGNUM)
3442 regno = current_frame_info.reg_fp;
3443 else if (regno == current_frame_info.reg_fp)
3444 regno = HARD_FRAME_POINTER_REGNUM;
3447 if (IN_REGNO_P (regno))
3448 return 32 + regno - IN_REG (0);
3449 else if (LOC_REGNO_P (regno))
3450 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3451 else if (OUT_REGNO_P (regno))
3452 return (32 + current_frame_info.n_input_regs
3453 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3459 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3461 rtx addr_reg, eight = GEN_INT (8);
3463 /* The Intel assembler requires that the global __ia64_trampoline symbol
3464 be declared explicitly */
3467 static bool declared_ia64_trampoline = false;
3469 if (!declared_ia64_trampoline)
3471 declared_ia64_trampoline = true;
3472 (*targetm.asm_out.globalize_label) (asm_out_file,
3473 "__ia64_trampoline");
3477 /* Load up our iterator. */
3478 addr_reg = gen_reg_rtx (Pmode);
3479 emit_move_insn (addr_reg, addr);
3481 /* The first two words are the fake descriptor:
3482 __ia64_trampoline, ADDR+16. */
3483 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3484 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3485 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3487 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3488 copy_to_reg (plus_constant (addr, 16)));
3489 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3491 /* The third word is the target descriptor. */
3492 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3493 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3495 /* The fourth word is the static chain. */
3496 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3499 /* Do any needed setup for a variadic function. CUM has not been updated
3500 for the last named argument which has type TYPE and mode MODE.
3502 We generate the actual spill instructions during prologue generation. */
3505 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3506 tree type, int * pretend_size,
3507 int second_time ATTRIBUTE_UNUSED)
3509 CUMULATIVE_ARGS next_cum = *cum;
3511 /* Skip the current argument. */
3512 ia64_function_arg_advance (&next_cum, mode, type, 1);
3514 if (next_cum.words < MAX_ARGUMENT_SLOTS)
3516 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
3517 *pretend_size = n * UNITS_PER_WORD;
3518 cfun->machine->n_varargs = n;
3522 /* Check whether TYPE is a homogeneous floating point aggregate. If
3523 it is, return the mode of the floating point type that appears
3524 in all leafs. If it is not, return VOIDmode.
3526 An aggregate is a homogeneous floating point aggregate is if all
3527 fields/elements in it have the same floating point type (e.g,
3528 SFmode). 128-bit quad-precision floats are excluded. */
3530 static enum machine_mode
3531 hfa_element_mode (tree type, int nested)
3533 enum machine_mode element_mode = VOIDmode;
3534 enum machine_mode mode;
3535 enum tree_code code = TREE_CODE (type);
3536 int know_element_mode = 0;
3541 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3542 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3543 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3544 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
3548 /* Fortran complex types are supposed to be HFAs, so we need to handle
3549 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3552 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3553 && TYPE_MODE (type) != TCmode)
3554 return GET_MODE_INNER (TYPE_MODE (type));
3559 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3560 mode if this is contained within an aggregate. */
3561 if (nested && TYPE_MODE (type) != TFmode)
3562 return TYPE_MODE (type);
3567 return hfa_element_mode (TREE_TYPE (type), 1);
3571 case QUAL_UNION_TYPE:
3572 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3574 if (TREE_CODE (t) != FIELD_DECL)
3577 mode = hfa_element_mode (TREE_TYPE (t), 1);
3578 if (know_element_mode)
3580 if (mode != element_mode)
3583 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3587 know_element_mode = 1;
3588 element_mode = mode;
3591 return element_mode;
3594 /* If we reach here, we probably have some front-end specific type
3595 that the backend doesn't know about. This can happen via the
3596 aggregate_value_p call in init_function_start. All we can do is
3597 ignore unknown tree types. */
3604 /* Return the number of words required to hold a quantity of TYPE and MODE
3605 when passed as an argument. */
3607 ia64_function_arg_words (tree type, enum machine_mode mode)
3611 if (mode == BLKmode)
3612 words = int_size_in_bytes (type);
3614 words = GET_MODE_SIZE (mode);
3616 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3619 /* Return the number of registers that should be skipped so the current
3620 argument (described by TYPE and WORDS) will be properly aligned.
3622 Integer and float arguments larger than 8 bytes start at the next
3623 even boundary. Aggregates larger than 8 bytes start at the next
3624 even boundary if the aggregate has 16 byte alignment. Note that
3625 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3626 but are still to be aligned in registers.
3628 ??? The ABI does not specify how to handle aggregates with
3629 alignment from 9 to 15 bytes, or greater than 16. We handle them
3630 all as if they had 16 byte alignment. Such aggregates can occur
3631 only if gcc extensions are used. */
3633 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3635 if ((cum->words & 1) == 0)
3639 && TREE_CODE (type) != INTEGER_TYPE
3640 && TREE_CODE (type) != REAL_TYPE)
3641 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3646 /* Return rtx for register where argument is passed, or zero if it is passed
3648 /* ??? 128-bit quad-precision floats are always passed in general
3652 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3653 int named, int incoming)
3655 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3656 int words = ia64_function_arg_words (type, mode);
3657 int offset = ia64_function_arg_offset (cum, type, words);
3658 enum machine_mode hfa_mode = VOIDmode;
3660 /* If all argument slots are used, then it must go on the stack. */
3661 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3664 /* Check for and handle homogeneous FP aggregates. */
3666 hfa_mode = hfa_element_mode (type, 0);
3668 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3669 and unprototyped hfas are passed specially. */
3670 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3674 int fp_regs = cum->fp_regs;
3675 int int_regs = cum->words + offset;
3676 int hfa_size = GET_MODE_SIZE (hfa_mode);
3680 /* If prototyped, pass it in FR regs then GR regs.
3681 If not prototyped, pass it in both FR and GR regs.
3683 If this is an SFmode aggregate, then it is possible to run out of
3684 FR regs while GR regs are still left. In that case, we pass the
3685 remaining part in the GR regs. */
3687 /* Fill the FP regs. We do this always. We stop if we reach the end
3688 of the argument, the last FP register, or the last argument slot. */
3690 byte_size = ((mode == BLKmode)
3691 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3692 args_byte_size = int_regs * UNITS_PER_WORD;
3694 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3695 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3697 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3698 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3702 args_byte_size += hfa_size;
3706 /* If no prototype, then the whole thing must go in GR regs. */
3707 if (! cum->prototype)
3709 /* If this is an SFmode aggregate, then we might have some left over
3710 that needs to go in GR regs. */
3711 else if (byte_size != offset)
3712 int_regs += offset / UNITS_PER_WORD;
3714 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3716 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3718 enum machine_mode gr_mode = DImode;
3719 unsigned int gr_size;
3721 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3722 then this goes in a GR reg left adjusted/little endian, right
3723 adjusted/big endian. */
3724 /* ??? Currently this is handled wrong, because 4-byte hunks are
3725 always right adjusted/little endian. */
3728 /* If we have an even 4 byte hunk because the aggregate is a
3729 multiple of 4 bytes in size, then this goes in a GR reg right
3730 adjusted/little endian. */
3731 else if (byte_size - offset == 4)
3734 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3735 gen_rtx_REG (gr_mode, (basereg
3739 gr_size = GET_MODE_SIZE (gr_mode);
3741 if (gr_size == UNITS_PER_WORD
3742 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
3744 else if (gr_size > UNITS_PER_WORD)
3745 int_regs += gr_size / UNITS_PER_WORD;
3748 /* If we ended up using just one location, just return that one loc, but
3749 change the mode back to the argument mode. */
3751 return gen_rtx_REG (mode, REGNO (XEXP (loc[0], 0)));
3753 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3756 /* Integral and aggregates go in general registers. If we have run out of
3757 FR registers, then FP values must also go in general registers. This can
3758 happen when we have a SFmode HFA. */
3759 else if (mode == TFmode || mode == TCmode
3760 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3762 int byte_size = ((mode == BLKmode)
3763 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3764 if (BYTES_BIG_ENDIAN
3765 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3766 && byte_size < UNITS_PER_WORD
3769 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3770 gen_rtx_REG (DImode,
3771 (basereg + cum->words
3774 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3777 return gen_rtx_REG (mode, basereg + cum->words + offset);
3781 /* If there is a prototype, then FP values go in a FR register when
3782 named, and in a GR register when unnamed. */
3783 else if (cum->prototype)
3786 return gen_rtx_REG (mode, basereg + cum->words + offset);
3788 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3790 /* If there is no prototype, then FP values go in both FR and GR
3794 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3795 gen_rtx_REG (mode, (FR_ARG_FIRST
3798 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3800 (basereg + cum->words
3804 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3808 /* Return number of words, at the beginning of the argument, that must be
3809 put in registers. 0 is the argument is entirely in registers or entirely
3813 ia64_function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3814 tree type, int named ATTRIBUTE_UNUSED)
3816 int words = ia64_function_arg_words (type, mode);
3817 int offset = ia64_function_arg_offset (cum, type, words);
3819 /* If all argument slots are used, then it must go on the stack. */
3820 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3823 /* It doesn't matter whether the argument goes in FR or GR regs. If
3824 it fits within the 8 argument slots, then it goes entirely in
3825 registers. If it extends past the last argument slot, then the rest
3826 goes on the stack. */
3828 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3831 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3834 /* Update CUM to point after this argument. This is patterned after
3835 ia64_function_arg. */
3838 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3839 tree type, int named)
3841 int words = ia64_function_arg_words (type, mode);
3842 int offset = ia64_function_arg_offset (cum, type, words);
3843 enum machine_mode hfa_mode = VOIDmode;
3845 /* If all arg slots are already full, then there is nothing to do. */
3846 if (cum->words >= MAX_ARGUMENT_SLOTS)
3849 cum->words += words + offset;
3851 /* Check for and handle homogeneous FP aggregates. */
3853 hfa_mode = hfa_element_mode (type, 0);
3855 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3856 and unprototyped hfas are passed specially. */
3857 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3859 int fp_regs = cum->fp_regs;
3860 /* This is the original value of cum->words + offset. */
3861 int int_regs = cum->words - words;
3862 int hfa_size = GET_MODE_SIZE (hfa_mode);
3866 /* If prototyped, pass it in FR regs then GR regs.
3867 If not prototyped, pass it in both FR and GR regs.
3869 If this is an SFmode aggregate, then it is possible to run out of
3870 FR regs while GR regs are still left. In that case, we pass the
3871 remaining part in the GR regs. */
3873 /* Fill the FP regs. We do this always. We stop if we reach the end
3874 of the argument, the last FP register, or the last argument slot. */
3876 byte_size = ((mode == BLKmode)
3877 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3878 args_byte_size = int_regs * UNITS_PER_WORD;
3880 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3881 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3884 args_byte_size += hfa_size;
3888 cum->fp_regs = fp_regs;
3891 /* Integral and aggregates go in general registers. If we have run out of
3892 FR registers, then FP values must also go in general registers. This can
3893 happen when we have a SFmode HFA. */
3894 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3895 cum->int_regs = cum->words;
3897 /* If there is a prototype, then FP values go in a FR register when
3898 named, and in a GR register when unnamed. */
3899 else if (cum->prototype)
3902 cum->int_regs = cum->words;
3904 /* ??? Complex types should not reach here. */
3905 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3907 /* If there is no prototype, then FP values go in both FR and GR
3911 /* ??? Complex types should not reach here. */
3912 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3913 cum->int_regs = cum->words;
3917 /* Variable sized types are passed by reference. */
3918 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3921 ia64_function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3922 enum machine_mode mode ATTRIBUTE_UNUSED,
3923 tree type, int named ATTRIBUTE_UNUSED)
3925 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3928 /* True if it is OK to do sibling call optimization for the specified
3929 call expression EXP. DECL will be the called function, or NULL if
3930 this is an indirect call. */
3932 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3934 /* We must always return with our current GP. This means we can
3935 only sibcall to functions defined in the current module. */
3936 return decl && (*targetm.binds_local_p) (decl);
3940 /* Implement va_arg. */
3943 ia64_va_arg (tree valist, tree type)
3947 /* Variable sized types are passed by reference. */
3948 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3950 rtx addr = force_reg (ptr_mode,
3951 std_expand_builtin_va_arg (valist, build_pointer_type (type)));
3952 #ifdef POINTERS_EXTEND_UNSIGNED
3953 addr = convert_memory_address (Pmode, addr);
3955 return gen_rtx_MEM (ptr_mode, addr);
3958 /* Aggregate arguments with alignment larger than 8 bytes start at
3959 the next even boundary. Integer and floating point arguments
3960 do so if they are larger than 8 bytes, whether or not they are
3961 also aligned larger than 8 bytes. */
3962 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
3963 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3965 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3966 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3967 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3968 build_int_2 (-2 * UNITS_PER_WORD, -1));
3969 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3970 TREE_SIDE_EFFECTS (t) = 1;
3971 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3974 return std_expand_builtin_va_arg (valist, type);
3977 /* Return 1 if function return value returned in memory. Return 0 if it is
3981 ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
3983 enum machine_mode mode;
3984 enum machine_mode hfa_mode;
3985 HOST_WIDE_INT byte_size;
3987 mode = TYPE_MODE (valtype);
3988 byte_size = GET_MODE_SIZE (mode);
3989 if (mode == BLKmode)
3991 byte_size = int_size_in_bytes (valtype);
3996 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3998 hfa_mode = hfa_element_mode (valtype, 0);
3999 if (hfa_mode != VOIDmode)
4001 int hfa_size = GET_MODE_SIZE (hfa_mode);
4003 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4008 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4014 /* Return rtx for register that holds the function return value. */
4017 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
4019 enum machine_mode mode;
4020 enum machine_mode hfa_mode;
4022 mode = TYPE_MODE (valtype);
4023 hfa_mode = hfa_element_mode (valtype, 0);
4025 if (hfa_mode != VOIDmode)
4033 hfa_size = GET_MODE_SIZE (hfa_mode);
4034 byte_size = ((mode == BLKmode)
4035 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4037 for (i = 0; offset < byte_size; i++)
4039 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4040 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4046 return XEXP (loc[0], 0);
4048 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4050 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4051 return gen_rtx_REG (mode, FR_ARG_FIRST);
4054 if (BYTES_BIG_ENDIAN
4055 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4063 bytesize = int_size_in_bytes (valtype);
4064 for (i = 0; offset < bytesize; i++)
4066 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4067 gen_rtx_REG (DImode,
4070 offset += UNITS_PER_WORD;
4072 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4075 return gen_rtx_REG (mode, GR_RET_FIRST);
4079 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
4080 We need to emit DTP-relative relocations. */
4083 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4087 fputs ("\tdata8.ua\t@dtprel(", file);
4088 output_addr_const (file, x);
4092 /* Print a memory address as an operand to reference that memory location. */
4094 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4095 also call this from ia64_print_operand for memory addresses. */
4098 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4099 rtx address ATTRIBUTE_UNUSED)
4103 /* Print an operand to an assembler instruction.
4104 C Swap and print a comparison operator.
4105 D Print an FP comparison operator.
4106 E Print 32 - constant, for SImode shifts as extract.
4107 e Print 64 - constant, for DImode rotates.
4108 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4109 a floating point register emitted normally.
4110 I Invert a predicate register by adding 1.
4111 J Select the proper predicate register for a condition.
4112 j Select the inverse predicate register for a condition.
4113 O Append .acq for volatile load.
4114 P Postincrement of a MEM.
4115 Q Append .rel for volatile store.
4116 S Shift amount for shladd instruction.
4117 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4118 for Intel assembler.
4119 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4120 for Intel assembler.
4121 r Print register name, or constant 0 as r0. HP compatibility for
4124 ia64_print_operand (FILE * file, rtx x, int code)
4131 /* Handled below. */
4136 enum rtx_code c = swap_condition (GET_CODE (x));
4137 fputs (GET_RTX_NAME (c), file);
4142 switch (GET_CODE (x))
4154 str = GET_RTX_NAME (GET_CODE (x));
4161 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4165 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4169 if (x == CONST0_RTX (GET_MODE (x)))
4170 str = reg_names [FR_REG (0)];
4171 else if (x == CONST1_RTX (GET_MODE (x)))
4172 str = reg_names [FR_REG (1)];
4173 else if (GET_CODE (x) == REG)
4174 str = reg_names [REGNO (x)];
4181 fputs (reg_names [REGNO (x) + 1], file);
4187 unsigned int regno = REGNO (XEXP (x, 0));
4188 if (GET_CODE (x) == EQ)
4192 fputs (reg_names [regno], file);
4197 if (MEM_VOLATILE_P (x))
4198 fputs(".acq", file);
4203 HOST_WIDE_INT value;
4205 switch (GET_CODE (XEXP (x, 0)))
4211 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4212 if (GET_CODE (x) == CONST_INT)
4214 else if (GET_CODE (x) == REG)
4216 fprintf (file, ", %s", reg_names[REGNO (x)]);
4224 value = GET_MODE_SIZE (GET_MODE (x));
4228 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4232 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4237 if (MEM_VOLATILE_P (x))
4238 fputs(".rel", file);
4242 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4246 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4248 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4254 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4256 const char *prefix = "0x";
4257 if (INTVAL (x) & 0x80000000)
4259 fprintf (file, "0xffffffff");
4262 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4268 /* If this operand is the constant zero, write it as register zero.
4269 Any register, zero, or CONST_INT value is OK here. */
4270 if (GET_CODE (x) == REG)
4271 fputs (reg_names[REGNO (x)], file);
4272 else if (x == CONST0_RTX (GET_MODE (x)))
4274 else if (GET_CODE (x) == CONST_INT)
4275 output_addr_const (file, x);
4277 output_operand_lossage ("invalid %%r value");
4284 /* For conditional branches, returns or calls, substitute
4285 sptk, dptk, dpnt, or spnt for %s. */
4286 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4289 int pred_val = INTVAL (XEXP (x, 0));
4291 /* Guess top and bottom 10% statically predicted. */
4292 if (pred_val < REG_BR_PROB_BASE / 50)
4294 else if (pred_val < REG_BR_PROB_BASE / 2)
4296 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4301 else if (GET_CODE (current_output_insn) == CALL_INSN)
4306 fputs (which, file);
4311 x = current_insn_predicate;
4314 unsigned int regno = REGNO (XEXP (x, 0));
4315 if (GET_CODE (x) == EQ)
4317 fprintf (file, "(%s) ", reg_names [regno]);
4322 output_operand_lossage ("ia64_print_operand: unknown code");
4326 switch (GET_CODE (x))
4328 /* This happens for the spill/restore instructions. */
4333 /* ... fall through ... */
4336 fputs (reg_names [REGNO (x)], file);
4341 rtx addr = XEXP (x, 0);
4342 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
4343 addr = XEXP (addr, 0);
4344 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4349 output_addr_const (file, x);
4356 /* Compute a (partial) cost for rtx X. Return true if the complete
4357 cost has been computed, and false if subexpressions should be
4358 scanned. In either case, *TOTAL contains the cost result. */
4359 /* ??? This is incomplete. */
4362 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4370 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4373 if (CONST_OK_FOR_I (INTVAL (x)))
4375 else if (CONST_OK_FOR_J (INTVAL (x)))
4378 *total = COSTS_N_INSNS (1);
4381 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4384 *total = COSTS_N_INSNS (1);
4389 *total = COSTS_N_INSNS (1);
4395 *total = COSTS_N_INSNS (3);
4399 /* For multiplies wider than HImode, we have to go to the FPU,
4400 which normally involves copies. Plus there's the latency
4401 of the multiply itself, and the latency of the instructions to
4402 transfer integer regs to FP regs. */
4403 /* ??? Check for FP mode. */
4404 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4405 *total = COSTS_N_INSNS (10);
4407 *total = COSTS_N_INSNS (2);
4415 *total = COSTS_N_INSNS (1);
4422 /* We make divide expensive, so that divide-by-constant will be
4423 optimized to a multiply. */
4424 *total = COSTS_N_INSNS (60);
4432 /* Calculate the cost of moving data from a register in class FROM to
4433 one in class TO, using MODE. */
4436 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4439 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4440 if (to == ADDL_REGS)
4442 if (from == ADDL_REGS)
4445 /* All costs are symmetric, so reduce cases by putting the
4446 lower number class as the destination. */
4449 enum reg_class tmp = to;
4450 to = from, from = tmp;
4453 /* Moving from FR<->GR in XFmode must be more expensive than 2,
4454 so that we get secondary memory reloads. Between FR_REGS,
4455 we have to make this at least as expensive as MEMORY_MOVE_COST
4456 to avoid spectacularly poor register class preferencing. */
4459 if (to != GR_REGS || from != GR_REGS)
4460 return MEMORY_MOVE_COST (mode, to, 0);
4468 /* Moving between PR registers takes two insns. */
4469 if (from == PR_REGS)
4471 /* Moving between PR and anything but GR is impossible. */
4472 if (from != GR_REGS)
4473 return MEMORY_MOVE_COST (mode, to, 0);
4477 /* Moving between BR and anything but GR is impossible. */
4478 if (from != GR_REGS && from != GR_AND_BR_REGS)
4479 return MEMORY_MOVE_COST (mode, to, 0);
4484 /* Moving between AR and anything but GR is impossible. */
4485 if (from != GR_REGS)
4486 return MEMORY_MOVE_COST (mode, to, 0);
4491 case GR_AND_FR_REGS:
4492 case GR_AND_BR_REGS:
4503 /* This function returns the register class required for a secondary
4504 register when copying between one of the registers in CLASS, and X,
4505 using MODE. A return value of NO_REGS means that no secondary register
4509 ia64_secondary_reload_class (enum reg_class class,
4510 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4514 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4515 regno = true_regnum (x);
4522 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4523 interaction. We end up with two pseudos with overlapping lifetimes
4524 both of which are equiv to the same constant, and both which need
4525 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4526 changes depending on the path length, which means the qty_first_reg
4527 check in make_regs_eqv can give different answers at different times.
4528 At some point I'll probably need a reload_indi pattern to handle
4531 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4532 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4533 non-general registers for good measure. */
4534 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4537 /* This is needed if a pseudo used as a call_operand gets spilled to a
4539 if (GET_CODE (x) == MEM)
4544 /* Need to go through general registers to get to other class regs. */
4545 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4548 /* This can happen when a paradoxical subreg is an operand to the
4550 /* ??? This shouldn't be necessary after instruction scheduling is
4551 enabled, because paradoxical subregs are not accepted by
4552 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4553 stop the paradoxical subreg stupidity in the *_operand functions
4555 if (GET_CODE (x) == MEM
4556 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4557 || GET_MODE (x) == QImode))
4560 /* This can happen because of the ior/and/etc patterns that accept FP
4561 registers as operands. If the third operand is a constant, then it
4562 needs to be reloaded into a FP register. */
4563 if (GET_CODE (x) == CONST_INT)
4566 /* This can happen because of register elimination in a muldi3 insn.
4567 E.g. `26107 * (unsigned long)&u'. */
4568 if (GET_CODE (x) == PLUS)
4573 /* ??? This happens if we cse/gcse a BImode value across a call,
4574 and the function has a nonlocal goto. This is because global
4575 does not allocate call crossing pseudos to hard registers when
4576 current_function_has_nonlocal_goto is true. This is relatively
4577 common for C++ programs that use exceptions. To reproduce,
4578 return NO_REGS and compile libstdc++. */
4579 if (GET_CODE (x) == MEM)
4582 /* This can happen when we take a BImode subreg of a DImode value,
4583 and that DImode value winds up in some non-GR register. */
4584 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4596 /* Emit text to declare externally defined variables and functions, because
4597 the Intel assembler does not support undefined externals. */
4600 ia64_asm_output_external (FILE *file, tree decl, const char *name)
4602 int save_referenced;
4604 /* GNU as does not need anything here, but the HP linker does need
4605 something for external functions. */
4609 || TREE_CODE (decl) != FUNCTION_DECL
4610 || strstr (name, "__builtin_") == name))
4613 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4614 the linker when we do this, so we need to be careful not to do this for
4615 builtin functions which have no library equivalent. Unfortunately, we
4616 can't tell here whether or not a function will actually be called by
4617 expand_expr, so we pull in library functions even if we may not need
4619 if (! strcmp (name, "__builtin_next_arg")
4620 || ! strcmp (name, "alloca")
4621 || ! strcmp (name, "__builtin_constant_p")
4622 || ! strcmp (name, "__builtin_args_info"))
4626 ia64_hpux_add_extern_decl (name);
4629 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4631 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4632 if (TREE_CODE (decl) == FUNCTION_DECL)
4633 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4634 (*targetm.asm_out.globalize_label) (file, name);
4635 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4639 /* Parse the -mfixed-range= option string. */
4642 fix_range (const char *const_str)
4645 char *str, *dash, *comma;
4647 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4648 REG2 are either register names or register numbers. The effect
4649 of this option is to mark the registers in the range from REG1 to
4650 REG2 as ``fixed'' so they won't be used by the compiler. This is
4651 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4653 i = strlen (const_str);
4654 str = (char *) alloca (i + 1);
4655 memcpy (str, const_str, i + 1);
4659 dash = strchr (str, '-');
4662 warning ("value of -mfixed-range must have form REG1-REG2");
4667 comma = strchr (dash + 1, ',');
4671 first = decode_reg_name (str);
4674 warning ("unknown register name: %s", str);
4678 last = decode_reg_name (dash + 1);
4681 warning ("unknown register name: %s", dash + 1);
4689 warning ("%s-%s is an empty range", str, dash + 1);
4693 for (i = first; i <= last; ++i)
4694 fixed_regs[i] = call_used_regs[i] = 1;
4704 static struct machine_function *
4705 ia64_init_machine_status (void)
4707 return ggc_alloc_cleared (sizeof (struct machine_function));
4710 /* Handle TARGET_OPTIONS switches. */
4713 ia64_override_options (void)
4717 const char *const name; /* processor name or nickname. */
4718 const enum processor_type processor;
4720 const processor_alias_table[] =
4722 {"itanium", PROCESSOR_ITANIUM},
4723 {"itanium1", PROCESSOR_ITANIUM},
4724 {"merced", PROCESSOR_ITANIUM},
4725 {"itanium2", PROCESSOR_ITANIUM2},
4726 {"mckinley", PROCESSOR_ITANIUM2},
4729 int const pta_size = ARRAY_SIZE (processor_alias_table);
4732 if (TARGET_AUTO_PIC)
4733 target_flags |= MASK_CONST_GP;
4735 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4737 warning ("cannot optimize floating point division for both latency and throughput");
4738 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4741 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4743 warning ("cannot optimize integer division for both latency and throughput");
4744 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4747 if (TARGET_INLINE_SQRT_LAT && TARGET_INLINE_SQRT_THR)
4749 warning ("cannot optimize square root for both latency and throughput");
4750 target_flags &= ~MASK_INLINE_SQRT_THR;
4753 if (TARGET_INLINE_SQRT_LAT)
4755 warning ("not yet implemented: latency-optimized inline square root");
4756 target_flags &= ~MASK_INLINE_SQRT_LAT;
4759 if (ia64_fixed_range_string)
4760 fix_range (ia64_fixed_range_string);
4762 if (ia64_tls_size_string)
4765 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4766 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4767 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4769 ia64_tls_size = tmp;
4772 if (!ia64_tune_string)
4773 ia64_tune_string = "itanium2";
4775 for (i = 0; i < pta_size; i++)
4776 if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
4778 ia64_tune = processor_alias_table[i].processor;
4783 error ("bad value (%s) for -tune= switch", ia64_tune_string);
4785 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4786 flag_schedule_insns_after_reload = 0;
4788 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4790 init_machine_status = ia64_init_machine_status;
4793 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4794 static enum attr_type ia64_safe_type (rtx);
4796 static enum attr_itanium_class
4797 ia64_safe_itanium_class (rtx insn)
4799 if (recog_memoized (insn) >= 0)
4800 return get_attr_itanium_class (insn);
4802 return ITANIUM_CLASS_UNKNOWN;
4805 static enum attr_type
4806 ia64_safe_type (rtx insn)
4808 if (recog_memoized (insn) >= 0)
4809 return get_attr_type (insn);
4811 return TYPE_UNKNOWN;
4814 /* The following collection of routines emit instruction group stop bits as
4815 necessary to avoid dependencies. */
4817 /* Need to track some additional registers as far as serialization is
4818 concerned so we can properly handle br.call and br.ret. We could
4819 make these registers visible to gcc, but since these registers are
4820 never explicitly used in gcc generated code, it seems wasteful to
4821 do so (plus it would make the call and return patterns needlessly
4823 #define REG_GP (GR_REG (1))
4824 #define REG_RP (BR_REG (0))
4825 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4826 /* This is used for volatile asms which may require a stop bit immediately
4827 before and after them. */
4828 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4829 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4830 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4832 /* For each register, we keep track of how it has been written in the
4833 current instruction group.
4835 If a register is written unconditionally (no qualifying predicate),
4836 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4838 If a register is written if its qualifying predicate P is true, we
4839 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4840 may be written again by the complement of P (P^1) and when this happens,
4841 WRITE_COUNT gets set to 2.
4843 The result of this is that whenever an insn attempts to write a register
4844 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4846 If a predicate register is written by a floating-point insn, we set
4847 WRITTEN_BY_FP to true.
4849 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4850 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4852 struct reg_write_state
4854 unsigned int write_count : 2;
4855 unsigned int first_pred : 16;
4856 unsigned int written_by_fp : 1;
4857 unsigned int written_by_and : 1;
4858 unsigned int written_by_or : 1;
4861 /* Cumulative info for the current instruction group. */
4862 struct reg_write_state rws_sum[NUM_REGS];
4863 /* Info for the current instruction. This gets copied to rws_sum after a
4864 stop bit is emitted. */
4865 struct reg_write_state rws_insn[NUM_REGS];
4867 /* Indicates whether this is the first instruction after a stop bit,
4868 in which case we don't need another stop bit. Without this, we hit
4869 the abort in ia64_variable_issue when scheduling an alloc. */
4870 static int first_instruction;
4872 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4873 RTL for one instruction. */
4876 unsigned int is_write : 1; /* Is register being written? */
4877 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4878 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4879 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4880 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4881 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4884 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4885 static int rws_access_regno (int, struct reg_flags, int);
4886 static int rws_access_reg (rtx, struct reg_flags, int);
4887 static void update_set_flags (rtx, struct reg_flags *, int *, rtx *);
4888 static int set_src_needs_barrier (rtx, struct reg_flags, int, rtx);
4889 static int rtx_needs_barrier (rtx, struct reg_flags, int);
4890 static void init_insn_group_barriers (void);
4891 static int group_barrier_needed_p (rtx);
4892 static int safe_group_barrier_needed_p (rtx);
4894 /* Update *RWS for REGNO, which is being written by the current instruction,
4895 with predicate PRED, and associated register flags in FLAGS. */
4898 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
4901 rws[regno].write_count++;
4903 rws[regno].write_count = 2;
4904 rws[regno].written_by_fp |= flags.is_fp;
4905 /* ??? Not tracking and/or across differing predicates. */
4906 rws[regno].written_by_and = flags.is_and;
4907 rws[regno].written_by_or = flags.is_or;
4908 rws[regno].first_pred = pred;
4911 /* Handle an access to register REGNO of type FLAGS using predicate register
4912 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4913 a dependency with an earlier instruction in the same group. */
4916 rws_access_regno (int regno, struct reg_flags flags, int pred)
4918 int need_barrier = 0;
4920 if (regno >= NUM_REGS)
4923 if (! PR_REGNO_P (regno))
4924 flags.is_and = flags.is_or = 0;
4930 /* One insn writes same reg multiple times? */
4931 if (rws_insn[regno].write_count > 0)
4934 /* Update info for current instruction. */
4935 rws_update (rws_insn, regno, flags, pred);
4936 write_count = rws_sum[regno].write_count;
4938 switch (write_count)
4941 /* The register has not been written yet. */
4942 rws_update (rws_sum, regno, flags, pred);
4946 /* The register has been written via a predicate. If this is
4947 not a complementary predicate, then we need a barrier. */
4948 /* ??? This assumes that P and P+1 are always complementary
4949 predicates for P even. */
4950 if (flags.is_and && rws_sum[regno].written_by_and)
4952 else if (flags.is_or && rws_sum[regno].written_by_or)
4954 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4956 rws_update (rws_sum, regno, flags, pred);
4960 /* The register has been unconditionally written already. We
4962 if (flags.is_and && rws_sum[regno].written_by_and)
4964 else if (flags.is_or && rws_sum[regno].written_by_or)
4968 rws_sum[regno].written_by_and = flags.is_and;
4969 rws_sum[regno].written_by_or = flags.is_or;
4978 if (flags.is_branch)
4980 /* Branches have several RAW exceptions that allow to avoid
4983 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4984 /* RAW dependencies on branch regs are permissible as long
4985 as the writer is a non-branch instruction. Since we
4986 never generate code that uses a branch register written
4987 by a branch instruction, handling this case is
4991 if (REGNO_REG_CLASS (regno) == PR_REGS
4992 && ! rws_sum[regno].written_by_fp)
4993 /* The predicates of a branch are available within the
4994 same insn group as long as the predicate was written by
4995 something other than a floating-point instruction. */
4999 if (flags.is_and && rws_sum[regno].written_by_and)
5001 if (flags.is_or && rws_sum[regno].written_by_or)
5004 switch (rws_sum[regno].write_count)
5007 /* The register has not been written yet. */
5011 /* The register has been written via a predicate. If this is
5012 not a complementary predicate, then we need a barrier. */
5013 /* ??? This assumes that P and P+1 are always complementary
5014 predicates for P even. */
5015 if ((rws_sum[regno].first_pred ^ 1) != pred)
5020 /* The register has been unconditionally written already. We
5030 return need_barrier;
5034 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5036 int regno = REGNO (reg);
5037 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5040 return rws_access_regno (regno, flags, pred);
5043 int need_barrier = 0;
5045 need_barrier |= rws_access_regno (regno + n, flags, pred);
5046 return need_barrier;
5050 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5051 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5054 update_set_flags (rtx x, struct reg_flags *pflags, int *ppred, rtx *pcond)
5056 rtx src = SET_SRC (x);
5060 switch (GET_CODE (src))
5066 if (SET_DEST (x) == pc_rtx)
5067 /* X is a conditional branch. */
5071 int is_complemented = 0;
5073 /* X is a conditional move. */
5074 rtx cond = XEXP (src, 0);
5075 if (GET_CODE (cond) == EQ)
5076 is_complemented = 1;
5077 cond = XEXP (cond, 0);
5078 if (GET_CODE (cond) != REG
5079 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5082 if (XEXP (src, 1) == SET_DEST (x)
5083 || XEXP (src, 2) == SET_DEST (x))
5085 /* X is a conditional move that conditionally writes the
5088 /* We need another complement in this case. */
5089 if (XEXP (src, 1) == SET_DEST (x))
5090 is_complemented = ! is_complemented;
5092 *ppred = REGNO (cond);
5093 if (is_complemented)
5097 /* ??? If this is a conditional write to the dest, then this
5098 instruction does not actually read one source. This probably
5099 doesn't matter, because that source is also the dest. */
5100 /* ??? Multiple writes to predicate registers are allowed
5101 if they are all AND type compares, or if they are all OR
5102 type compares. We do not generate such instructions
5105 /* ... fall through ... */
5108 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
5109 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
5110 /* Set pflags->is_fp to 1 so that we know we're dealing
5111 with a floating point comparison when processing the
5112 destination of the SET. */
5115 /* Discover if this is a parallel comparison. We only handle
5116 and.orcm and or.andcm at present, since we must retain a
5117 strict inverse on the predicate pair. */
5118 else if (GET_CODE (src) == AND)
5120 else if (GET_CODE (src) == IOR)
5127 /* Subroutine of rtx_needs_barrier; this function determines whether the
5128 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5129 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5133 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred, rtx cond)
5135 int need_barrier = 0;
5137 rtx src = SET_SRC (x);
5139 if (GET_CODE (src) == CALL)
5140 /* We don't need to worry about the result registers that
5141 get written by subroutine call. */
5142 return rtx_needs_barrier (src, flags, pred);
5143 else if (SET_DEST (x) == pc_rtx)
5145 /* X is a conditional branch. */
5146 /* ??? This seems redundant, as the caller sets this bit for
5148 flags.is_branch = 1;
5149 return rtx_needs_barrier (src, flags, pred);
5152 need_barrier = rtx_needs_barrier (src, flags, pred);
5154 /* This instruction unconditionally uses a predicate register. */
5156 need_barrier |= rws_access_reg (cond, flags, 0);
5159 if (GET_CODE (dst) == ZERO_EXTRACT)
5161 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5162 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5163 dst = XEXP (dst, 0);
5165 return need_barrier;
5168 /* Handle an access to rtx X of type FLAGS using predicate register
5169 PRED. Return 1 if this access creates a dependency with an earlier
5170 instruction in the same group. */
5173 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5176 int is_complemented = 0;
5177 int need_barrier = 0;
5178 const char *format_ptr;
5179 struct reg_flags new_flags;
5187 switch (GET_CODE (x))
5190 update_set_flags (x, &new_flags, &pred, &cond);
5191 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
5192 if (GET_CODE (SET_SRC (x)) != CALL)
5194 new_flags.is_write = 1;
5195 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5200 new_flags.is_write = 0;
5201 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5203 /* Avoid multiple register writes, in case this is a pattern with
5204 multiple CALL rtx. This avoids an abort in rws_access_reg. */
5205 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5207 new_flags.is_write = 1;
5208 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5209 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5210 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5215 /* X is a predicated instruction. */
5217 cond = COND_EXEC_TEST (x);
5220 need_barrier = rtx_needs_barrier (cond, flags, 0);
5222 if (GET_CODE (cond) == EQ)
5223 is_complemented = 1;
5224 cond = XEXP (cond, 0);
5225 if (GET_CODE (cond) != REG
5226 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5228 pred = REGNO (cond);
5229 if (is_complemented)
5232 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5233 return need_barrier;
5237 /* Clobber & use are for earlier compiler-phases only. */
5242 /* We always emit stop bits for traditional asms. We emit stop bits
5243 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5244 if (GET_CODE (x) != ASM_OPERANDS
5245 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5247 /* Avoid writing the register multiple times if we have multiple
5248 asm outputs. This avoids an abort in rws_access_reg. */
5249 if (! rws_insn[REG_VOLATILE].write_count)
5251 new_flags.is_write = 1;
5252 rws_access_regno (REG_VOLATILE, new_flags, pred);
5257 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5258 We can not just fall through here since then we would be confused
5259 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5260 traditional asms unlike their normal usage. */
5262 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5263 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5268 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5270 rtx pat = XVECEXP (x, 0, i);
5271 if (GET_CODE (pat) == SET)
5273 update_set_flags (pat, &new_flags, &pred, &cond);
5274 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
5276 else if (GET_CODE (pat) == USE
5277 || GET_CODE (pat) == CALL
5278 || GET_CODE (pat) == ASM_OPERANDS)
5279 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5280 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
5283 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5285 rtx pat = XVECEXP (x, 0, i);
5286 if (GET_CODE (pat) == SET)
5288 if (GET_CODE (SET_SRC (pat)) != CALL)
5290 new_flags.is_write = 1;
5291 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5295 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5296 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5304 if (REGNO (x) == AR_UNAT_REGNUM)
5306 for (i = 0; i < 64; ++i)
5307 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5310 need_barrier = rws_access_reg (x, flags, pred);
5314 /* Find the regs used in memory address computation. */
5315 new_flags.is_write = 0;
5316 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5319 case CONST_INT: case CONST_DOUBLE:
5320 case SYMBOL_REF: case LABEL_REF: case CONST:
5323 /* Operators with side-effects. */
5324 case POST_INC: case POST_DEC:
5325 if (GET_CODE (XEXP (x, 0)) != REG)
5328 new_flags.is_write = 0;
5329 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5330 new_flags.is_write = 1;
5331 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5335 if (GET_CODE (XEXP (x, 0)) != REG)
5338 new_flags.is_write = 0;
5339 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5340 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5341 new_flags.is_write = 1;
5342 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5345 /* Handle common unary and binary ops for efficiency. */
5346 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5347 case MOD: case UDIV: case UMOD: case AND: case IOR:
5348 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5349 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5350 case NE: case EQ: case GE: case GT: case LE:
5351 case LT: case GEU: case GTU: case LEU: case LTU:
5352 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5353 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5356 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5357 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5358 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5359 case SQRT: case FFS: case POPCOUNT:
5360 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5364 switch (XINT (x, 1))
5366 case UNSPEC_LTOFF_DTPMOD:
5367 case UNSPEC_LTOFF_DTPREL:
5369 case UNSPEC_LTOFF_TPREL:
5371 case UNSPEC_PRED_REL_MUTEX:
5372 case UNSPEC_PIC_CALL:
5374 case UNSPEC_FETCHADD_ACQ:
5375 case UNSPEC_BSP_VALUE:
5376 case UNSPEC_FLUSHRS:
5377 case UNSPEC_BUNDLE_SELECTOR:
5380 case UNSPEC_GR_SPILL:
5381 case UNSPEC_GR_RESTORE:
5383 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5384 HOST_WIDE_INT bit = (offset >> 3) & 63;
5386 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5387 new_flags.is_write = (XINT (x, 1) == 1);
5388 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5393 case UNSPEC_FR_SPILL:
5394 case UNSPEC_FR_RESTORE:
5395 case UNSPEC_GETF_EXP:
5396 case UNSPEC_SETF_EXP:
5398 case UNSPEC_FR_SQRT_RECIP_APPROX:
5399 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5402 case UNSPEC_FR_RECIP_APPROX:
5403 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5404 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5407 case UNSPEC_CMPXCHG_ACQ:
5408 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5409 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5417 case UNSPEC_VOLATILE:
5418 switch (XINT (x, 1))
5421 /* Alloc must always be the first instruction of a group.
5422 We force this by always returning true. */
5423 /* ??? We might get better scheduling if we explicitly check for
5424 input/local/output register dependencies, and modify the
5425 scheduler so that alloc is always reordered to the start of
5426 the current group. We could then eliminate all of the
5427 first_instruction code. */
5428 rws_access_regno (AR_PFS_REGNUM, flags, pred);
5430 new_flags.is_write = 1;
5431 rws_access_regno (REG_AR_CFM, new_flags, pred);
5434 case UNSPECV_SET_BSP:
5438 case UNSPECV_BLOCKAGE:
5439 case UNSPECV_INSN_GROUP_BARRIER:
5441 case UNSPECV_PSAC_ALL:
5442 case UNSPECV_PSAC_NORMAL:
5451 new_flags.is_write = 0;
5452 need_barrier = rws_access_regno (REG_RP, flags, pred);
5453 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5455 new_flags.is_write = 1;
5456 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5457 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5461 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5462 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5463 switch (format_ptr[i])
5465 case '0': /* unused field */
5466 case 'i': /* integer */
5467 case 'n': /* note */
5468 case 'w': /* wide integer */
5469 case 's': /* pointer to string */
5470 case 'S': /* optional pointer to string */
5474 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5479 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5480 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5489 return need_barrier;
5492 /* Clear out the state for group_barrier_needed_p at the start of a
5493 sequence of insns. */
5496 init_insn_group_barriers (void)
5498 memset (rws_sum, 0, sizeof (rws_sum));
5499 first_instruction = 1;
5502 /* Given the current state, recorded by previous calls to this function,
5503 determine whether a group barrier (a stop bit) is necessary before INSN.
5504 Return nonzero if so. */
5507 group_barrier_needed_p (rtx insn)
5510 int need_barrier = 0;
5511 struct reg_flags flags;
5513 memset (&flags, 0, sizeof (flags));
5514 switch (GET_CODE (insn))
5520 /* A barrier doesn't imply an instruction group boundary. */
5524 memset (rws_insn, 0, sizeof (rws_insn));
5528 flags.is_branch = 1;
5529 flags.is_sibcall = SIBLING_CALL_P (insn);
5530 memset (rws_insn, 0, sizeof (rws_insn));
5532 /* Don't bundle a call following another call. */
5533 if ((pat = prev_active_insn (insn))
5534 && GET_CODE (pat) == CALL_INSN)
5540 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5544 flags.is_branch = 1;
5546 /* Don't bundle a jump following a call. */
5547 if ((pat = prev_active_insn (insn))
5548 && GET_CODE (pat) == CALL_INSN)
5556 if (GET_CODE (PATTERN (insn)) == USE
5557 || GET_CODE (PATTERN (insn)) == CLOBBER)
5558 /* Don't care about USE and CLOBBER "insns"---those are used to
5559 indicate to the optimizer that it shouldn't get rid of
5560 certain operations. */
5563 pat = PATTERN (insn);
5565 /* Ug. Hack hacks hacked elsewhere. */
5566 switch (recog_memoized (insn))
5568 /* We play dependency tricks with the epilogue in order
5569 to get proper schedules. Undo this for dv analysis. */
5570 case CODE_FOR_epilogue_deallocate_stack:
5571 case CODE_FOR_prologue_allocate_stack:
5572 pat = XVECEXP (pat, 0, 0);
5575 /* The pattern we use for br.cloop confuses the code above.
5576 The second element of the vector is representative. */
5577 case CODE_FOR_doloop_end_internal:
5578 pat = XVECEXP (pat, 0, 1);
5581 /* Doesn't generate code. */
5582 case CODE_FOR_pred_rel_mutex:
5583 case CODE_FOR_prologue_use:
5590 memset (rws_insn, 0, sizeof (rws_insn));
5591 need_barrier = rtx_needs_barrier (pat, flags, 0);
5593 /* Check to see if the previous instruction was a volatile
5596 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5603 if (first_instruction && INSN_P (insn)
5604 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5605 && GET_CODE (PATTERN (insn)) != USE
5606 && GET_CODE (PATTERN (insn)) != CLOBBER)
5609 first_instruction = 0;
5612 return need_barrier;
5615 /* Like group_barrier_needed_p, but do not clobber the current state. */
5618 safe_group_barrier_needed_p (rtx insn)
5620 struct reg_write_state rws_saved[NUM_REGS];
5621 int saved_first_instruction;
5624 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5625 saved_first_instruction = first_instruction;
5627 t = group_barrier_needed_p (insn);
5629 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5630 first_instruction = saved_first_instruction;
5635 /* Scan the current function and insert stop bits as necessary to
5636 eliminate dependencies. This function assumes that a final
5637 instruction scheduling pass has been run which has already
5638 inserted most of the necessary stop bits. This function only
5639 inserts new ones at basic block boundaries, since these are
5640 invisible to the scheduler. */
5643 emit_insn_group_barriers (FILE *dump)
5647 int insns_since_last_label = 0;
5649 init_insn_group_barriers ();
5651 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5653 if (GET_CODE (insn) == CODE_LABEL)
5655 if (insns_since_last_label)
5657 insns_since_last_label = 0;
5659 else if (GET_CODE (insn) == NOTE
5660 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5662 if (insns_since_last_label)
5664 insns_since_last_label = 0;
5666 else if (GET_CODE (insn) == INSN
5667 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5668 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5670 init_insn_group_barriers ();
5673 else if (INSN_P (insn))
5675 insns_since_last_label = 1;
5677 if (group_barrier_needed_p (insn))
5682 fprintf (dump, "Emitting stop before label %d\n",
5683 INSN_UID (last_label));
5684 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5687 init_insn_group_barriers ();
5695 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5696 This function has to emit all necessary group barriers. */
5699 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5703 init_insn_group_barriers ();
5705 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5707 if (GET_CODE (insn) == BARRIER)
5709 rtx last = prev_active_insn (insn);
5713 if (GET_CODE (last) == JUMP_INSN
5714 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5715 last = prev_active_insn (last);
5716 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5717 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5719 init_insn_group_barriers ();
5721 else if (INSN_P (insn))
5723 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5724 init_insn_group_barriers ();
5725 else if (group_barrier_needed_p (insn))
5727 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5728 init_insn_group_barriers ();
5729 group_barrier_needed_p (insn);
5736 static int errata_find_address_regs (rtx *, void *);
5737 static void errata_emit_nops (rtx);
5738 static void fixup_errata (void);
5740 /* This structure is used to track some details about the previous insns
5741 groups so we can determine if it may be necessary to insert NOPs to
5742 workaround hardware errata. */
5745 HARD_REG_SET p_reg_set;
5746 HARD_REG_SET gr_reg_conditionally_set;
5749 /* Index into the last_group array. */
5750 static int group_idx;
5752 /* Called through for_each_rtx; determines if a hard register that was
5753 conditionally set in the previous group is used as an address register.
5754 It ensures that for_each_rtx returns 1 in that case. */
5756 errata_find_address_regs (rtx *xp, void *data ATTRIBUTE_UNUSED)
5759 if (GET_CODE (x) != MEM)
5762 if (GET_CODE (x) == POST_MODIFY)
5764 if (GET_CODE (x) == REG)
5766 struct group *prev_group = last_group + (group_idx ^ 1);
5767 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5775 /* Called for each insn; this function keeps track of the state in
5776 last_group and emits additional NOPs if necessary to work around
5777 an Itanium A/B step erratum. */
5779 errata_emit_nops (rtx insn)
5781 struct group *this_group = last_group + group_idx;
5782 struct group *prev_group = last_group + (group_idx ^ 1);
5783 rtx pat = PATTERN (insn);
5784 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5785 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5786 enum attr_type type;
5789 if (GET_CODE (real_pat) == USE
5790 || GET_CODE (real_pat) == CLOBBER
5791 || GET_CODE (real_pat) == ASM_INPUT
5792 || GET_CODE (real_pat) == ADDR_VEC
5793 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5794 || asm_noperands (PATTERN (insn)) >= 0)
5797 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5800 if (GET_CODE (set) == PARALLEL)
5803 set = XVECEXP (real_pat, 0, 0);
5804 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5805 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5806 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5813 if (set && GET_CODE (set) != SET)
5816 type = get_attr_type (insn);
5819 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5820 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5822 if ((type == TYPE_M || type == TYPE_A) && cond && set
5823 && REG_P (SET_DEST (set))
5824 && GET_CODE (SET_SRC (set)) != PLUS
5825 && GET_CODE (SET_SRC (set)) != MINUS
5826 && (GET_CODE (SET_SRC (set)) != ASHIFT
5827 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5828 && (GET_CODE (SET_SRC (set)) != MEM
5829 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5830 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5832 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5833 || ! REG_P (XEXP (cond, 0)))
5836 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5837 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5839 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5841 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5842 emit_insn_before (gen_nop (), insn);
5843 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5845 memset (last_group, 0, sizeof last_group);
5849 /* Emit extra nops if they are required to work around hardware errata. */
5856 if (! TARGET_B_STEP)
5860 memset (last_group, 0, sizeof last_group);
5862 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5867 if (ia64_safe_type (insn) == TYPE_S)
5870 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5873 errata_emit_nops (insn);
5878 /* Instruction scheduling support. */
5880 #define NR_BUNDLES 10
5882 /* A list of names of all available bundles. */
5884 static const char *bundle_name [NR_BUNDLES] =
5890 #if NR_BUNDLES == 10
5900 /* Nonzero if we should insert stop bits into the schedule. */
5902 int ia64_final_schedule = 0;
5904 /* Codes of the corresponding quieryied units: */
5906 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5907 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5909 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5910 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5912 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5914 /* The following variable value is an insn group barrier. */
5916 static rtx dfa_stop_insn;
5918 /* The following variable value is the last issued insn. */
5920 static rtx last_scheduled_insn;
5922 /* The following variable value is size of the DFA state. */
5924 static size_t dfa_state_size;
5926 /* The following variable value is pointer to a DFA state used as
5927 temporary variable. */
5929 static state_t temp_dfa_state = NULL;
5931 /* The following variable value is DFA state after issuing the last
5934 static state_t prev_cycle_state = NULL;
5936 /* The following array element values are TRUE if the corresponding
5937 insn requires to add stop bits before it. */
5939 static char *stops_p;
5941 /* The following variable is used to set up the mentioned above array. */
5943 static int stop_before_p = 0;
5945 /* The following variable value is length of the arrays `clocks' and
5948 static int clocks_length;
5950 /* The following array element values are cycles on which the
5951 corresponding insn will be issued. The array is used only for
5956 /* The following array element values are numbers of cycles should be
5957 added to improve insn scheduling for MM_insns for Itanium1. */
5959 static int *add_cycles;
5961 static rtx ia64_single_set (rtx);
5962 static void ia64_emit_insn_before (rtx, rtx);
5964 /* Map a bundle number to its pseudo-op. */
5967 get_bundle_name (int b)
5969 return bundle_name[b];
5973 /* Return the maximum number of instructions a cpu can issue. */
5976 ia64_issue_rate (void)
5981 /* Helper function - like single_set, but look inside COND_EXEC. */
5984 ia64_single_set (rtx insn)
5986 rtx x = PATTERN (insn), ret;
5987 if (GET_CODE (x) == COND_EXEC)
5988 x = COND_EXEC_CODE (x);
5989 if (GET_CODE (x) == SET)
5992 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5993 Although they are not classical single set, the second set is there just
5994 to protect it from moving past FP-relative stack accesses. */
5995 switch (recog_memoized (insn))
5997 case CODE_FOR_prologue_allocate_stack:
5998 case CODE_FOR_epilogue_deallocate_stack:
5999 ret = XVECEXP (x, 0, 0);
6003 ret = single_set_2 (insn, x);
6010 /* Adjust the cost of a scheduling dependency. Return the new cost of
6011 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
6014 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
6016 enum attr_itanium_class dep_class;
6017 enum attr_itanium_class insn_class;
6019 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
6022 insn_class = ia64_safe_itanium_class (insn);
6023 dep_class = ia64_safe_itanium_class (dep_insn);
6024 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6025 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6031 /* Like emit_insn_before, but skip cycle_display notes.
6032 ??? When cycle display notes are implemented, update this. */
6035 ia64_emit_insn_before (rtx insn, rtx before)
6037 emit_insn_before (insn, before);
6040 /* The following function marks insns who produce addresses for load
6041 and store insns. Such insns will be placed into M slots because it
6042 decrease latency time for Itanium1 (see function
6043 `ia64_produce_address_p' and the DFA descriptions). */
6046 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6048 rtx insn, link, next, next_tail;
6050 next_tail = NEXT_INSN (tail);
6051 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6054 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6056 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6058 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6060 next = XEXP (link, 0);
6061 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
6062 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
6063 && ia64_st_address_bypass_p (insn, next))
6065 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
6066 || ia64_safe_itanium_class (next)
6067 == ITANIUM_CLASS_FLD)
6068 && ia64_ld_address_bypass_p (insn, next))
6071 insn->call = link != 0;
6075 /* We're beginning a new block. Initialize data structures as necessary. */
6078 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6079 int sched_verbose ATTRIBUTE_UNUSED,
6080 int max_ready ATTRIBUTE_UNUSED)
6082 #ifdef ENABLE_CHECKING
6085 if (reload_completed)
6086 for (insn = NEXT_INSN (current_sched_info->prev_head);
6087 insn != current_sched_info->next_tail;
6088 insn = NEXT_INSN (insn))
6089 if (SCHED_GROUP_P (insn))
6092 last_scheduled_insn = NULL_RTX;
6093 init_insn_group_barriers ();
6096 /* We are about to being issuing insns for this clock cycle.
6097 Override the default sort algorithm to better slot instructions. */
6100 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6101 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6105 int n_ready = *pn_ready;
6106 rtx *e_ready = ready + n_ready;
6110 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6112 if (reorder_type == 0)
6114 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6116 for (insnp = ready; insnp < e_ready; insnp++)
6117 if (insnp < e_ready)
6120 enum attr_type t = ia64_safe_type (insn);
6121 if (t == TYPE_UNKNOWN)
6123 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6124 || asm_noperands (PATTERN (insn)) >= 0)
6126 rtx lowest = ready[n_asms];
6127 ready[n_asms] = insn;
6133 rtx highest = ready[n_ready - 1];
6134 ready[n_ready - 1] = insn;
6141 if (n_asms < n_ready)
6143 /* Some normal insns to process. Skip the asms. */
6147 else if (n_ready > 0)
6151 if (ia64_final_schedule)
6154 int nr_need_stop = 0;
6156 for (insnp = ready; insnp < e_ready; insnp++)
6157 if (safe_group_barrier_needed_p (*insnp))
6160 if (reorder_type == 1 && n_ready == nr_need_stop)
6162 if (reorder_type == 0)
6165 /* Move down everything that needs a stop bit, preserving
6167 while (insnp-- > ready + deleted)
6168 while (insnp >= ready + deleted)
6171 if (! safe_group_barrier_needed_p (insn))
6173 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6184 /* We are about to being issuing insns for this clock cycle. Override
6185 the default sort algorithm to better slot instructions. */
6188 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6191 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6192 pn_ready, clock_var, 0);
6195 /* Like ia64_sched_reorder, but called after issuing each insn.
6196 Override the default sort algorithm to better slot instructions. */
6199 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6200 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6201 int *pn_ready, int clock_var)
6203 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6204 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6205 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6209 /* We are about to issue INSN. Return the number of insns left on the
6210 ready queue that can be issued this cycle. */
6213 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6214 int sched_verbose ATTRIBUTE_UNUSED,
6215 rtx insn ATTRIBUTE_UNUSED,
6216 int can_issue_more ATTRIBUTE_UNUSED)
6218 last_scheduled_insn = insn;
6219 memcpy (prev_cycle_state, curr_state, dfa_state_size);
6220 if (reload_completed)
6222 if (group_barrier_needed_p (insn))
6224 if (GET_CODE (insn) == CALL_INSN)
6225 init_insn_group_barriers ();
6226 stops_p [INSN_UID (insn)] = stop_before_p;
6232 /* We are choosing insn from the ready queue. Return nonzero if INSN
6236 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6238 if (insn == NULL_RTX || !INSN_P (insn))
6240 return (!reload_completed
6241 || !safe_group_barrier_needed_p (insn));
6244 /* The following variable value is pseudo-insn used by the DFA insn
6245 scheduler to change the DFA state when the simulated clock is
6248 static rtx dfa_pre_cycle_insn;
6250 /* We are about to being issuing INSN. Return nonzero if we can not
6251 issue it on given cycle CLOCK and return zero if we should not sort
6252 the ready queue on the next clock start. */
6255 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6256 int clock, int *sort_p)
6258 int setup_clocks_p = FALSE;
6260 if (insn == NULL_RTX || !INSN_P (insn))
6262 if ((reload_completed && safe_group_barrier_needed_p (insn))
6263 || (last_scheduled_insn
6264 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6265 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6266 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6268 init_insn_group_barriers ();
6269 if (verbose && dump)
6270 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6271 last_clock == clock ? " + cycle advance" : "");
6273 if (last_clock == clock)
6275 state_transition (curr_state, dfa_stop_insn);
6276 if (TARGET_EARLY_STOP_BITS)
6277 *sort_p = (last_scheduled_insn == NULL_RTX
6278 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6283 else if (reload_completed)
6284 setup_clocks_p = TRUE;
6285 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6286 state_transition (curr_state, dfa_stop_insn);
6287 state_transition (curr_state, dfa_pre_cycle_insn);
6288 state_transition (curr_state, NULL);
6290 else if (reload_completed)
6291 setup_clocks_p = TRUE;
6292 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM)
6294 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6296 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6301 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6302 if (REG_NOTE_KIND (link) == 0)
6304 enum attr_itanium_class dep_class;
6305 rtx dep_insn = XEXP (link, 0);
6307 dep_class = ia64_safe_itanium_class (dep_insn);
6308 if ((dep_class == ITANIUM_CLASS_MMMUL
6309 || dep_class == ITANIUM_CLASS_MMSHF)
6310 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6312 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6313 d = last_clock - clocks [INSN_UID (dep_insn)];
6316 add_cycles [INSN_UID (insn)] = 3 - d;
6324 /* The following page contains abstract data `bundle states' which are
6325 used for bundling insns (inserting nops and template generation). */
6327 /* The following describes state of insn bundling. */
6331 /* Unique bundle state number to identify them in the debugging
6334 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
6335 /* number nops before and after the insn */
6336 short before_nops_num, after_nops_num;
6337 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6339 int cost; /* cost of the state in cycles */
6340 int accumulated_insns_num; /* number of all previous insns including
6341 nops. L is considered as 2 insns */
6342 int branch_deviation; /* deviation of previous branches from 3rd slots */
6343 struct bundle_state *next; /* next state with the same insn_num */
6344 struct bundle_state *originator; /* originator (previous insn state) */
6345 /* All bundle states are in the following chain. */
6346 struct bundle_state *allocated_states_chain;
6347 /* The DFA State after issuing the insn and the nops. */
6351 /* The following is map insn number to the corresponding bundle state. */
6353 static struct bundle_state **index_to_bundle_states;
6355 /* The unique number of next bundle state. */
6357 static int bundle_states_num;
6359 /* All allocated bundle states are in the following chain. */
6361 static struct bundle_state *allocated_bundle_states_chain;
6363 /* All allocated but not used bundle states are in the following
6366 static struct bundle_state *free_bundle_state_chain;
6369 /* The following function returns a free bundle state. */
6371 static struct bundle_state *
6372 get_free_bundle_state (void)
6374 struct bundle_state *result;
6376 if (free_bundle_state_chain != NULL)
6378 result = free_bundle_state_chain;
6379 free_bundle_state_chain = result->next;
6383 result = xmalloc (sizeof (struct bundle_state));
6384 result->dfa_state = xmalloc (dfa_state_size);
6385 result->allocated_states_chain = allocated_bundle_states_chain;
6386 allocated_bundle_states_chain = result;
6388 result->unique_num = bundle_states_num++;
6393 /* The following function frees given bundle state. */
6396 free_bundle_state (struct bundle_state *state)
6398 state->next = free_bundle_state_chain;
6399 free_bundle_state_chain = state;
6402 /* Start work with abstract data `bundle states'. */
6405 initiate_bundle_states (void)
6407 bundle_states_num = 0;
6408 free_bundle_state_chain = NULL;
6409 allocated_bundle_states_chain = NULL;
6412 /* Finish work with abstract data `bundle states'. */
6415 finish_bundle_states (void)
6417 struct bundle_state *curr_state, *next_state;
6419 for (curr_state = allocated_bundle_states_chain;
6421 curr_state = next_state)
6423 next_state = curr_state->allocated_states_chain;
6424 free (curr_state->dfa_state);
6429 /* Hash table of the bundle states. The key is dfa_state and insn_num
6430 of the bundle states. */
6432 static htab_t bundle_state_table;
6434 /* The function returns hash of BUNDLE_STATE. */
6437 bundle_state_hash (const void *bundle_state)
6439 const struct bundle_state *state = (struct bundle_state *) bundle_state;
6442 for (result = i = 0; i < dfa_state_size; i++)
6443 result += (((unsigned char *) state->dfa_state) [i]
6444 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6445 return result + state->insn_num;
6448 /* The function returns nonzero if the bundle state keys are equal. */
6451 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
6453 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6454 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6456 return (state1->insn_num == state2->insn_num
6457 && memcmp (state1->dfa_state, state2->dfa_state,
6458 dfa_state_size) == 0);
6461 /* The function inserts the BUNDLE_STATE into the hash table. The
6462 function returns nonzero if the bundle has been inserted into the
6463 table. The table contains the best bundle state with given key. */
6466 insert_bundle_state (struct bundle_state *bundle_state)
6470 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6471 if (*entry_ptr == NULL)
6473 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6474 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6475 *entry_ptr = (void *) bundle_state;
6478 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6479 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6480 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6481 > bundle_state->accumulated_insns_num
6482 || (((struct bundle_state *)
6483 *entry_ptr)->accumulated_insns_num
6484 == bundle_state->accumulated_insns_num
6485 && ((struct bundle_state *)
6486 *entry_ptr)->branch_deviation
6487 > bundle_state->branch_deviation))))
6490 struct bundle_state temp;
6492 temp = *(struct bundle_state *) *entry_ptr;
6493 *(struct bundle_state *) *entry_ptr = *bundle_state;
6494 ((struct bundle_state *) *entry_ptr)->next = temp.next;
6495 *bundle_state = temp;
6500 /* Start work with the hash table. */
6503 initiate_bundle_state_table (void)
6505 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6509 /* Finish work with the hash table. */
6512 finish_bundle_state_table (void)
6514 htab_delete (bundle_state_table);
6519 /* The following variable is a insn `nop' used to check bundle states
6520 with different number of inserted nops. */
6522 static rtx ia64_nop;
6524 /* The following function tries to issue NOPS_NUM nops for the current
6525 state without advancing processor cycle. If it failed, the
6526 function returns FALSE and frees the current state. */
6529 try_issue_nops (struct bundle_state *curr_state, int nops_num)
6533 for (i = 0; i < nops_num; i++)
6534 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6536 free_bundle_state (curr_state);
6542 /* The following function tries to issue INSN for the current
6543 state without advancing processor cycle. If it failed, the
6544 function returns FALSE and frees the current state. */
6547 try_issue_insn (struct bundle_state *curr_state, rtx insn)
6549 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6551 free_bundle_state (curr_state);
6557 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6558 starting with ORIGINATOR without advancing processor cycle. If
6559 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6560 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6561 If it was successful, the function creates new bundle state and
6562 insert into the hash table and into `index_to_bundle_states'. */
6565 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6566 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6568 struct bundle_state *curr_state;
6570 curr_state = get_free_bundle_state ();
6571 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6572 curr_state->insn = insn;
6573 curr_state->insn_num = originator->insn_num + 1;
6574 curr_state->cost = originator->cost;
6575 curr_state->originator = originator;
6576 curr_state->before_nops_num = before_nops_num;
6577 curr_state->after_nops_num = 0;
6578 curr_state->accumulated_insns_num
6579 = originator->accumulated_insns_num + before_nops_num;
6580 curr_state->branch_deviation = originator->branch_deviation;
6581 if (insn == NULL_RTX)
6583 else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6585 if (GET_MODE (insn) == TImode)
6587 if (!try_issue_nops (curr_state, before_nops_num))
6589 if (!try_issue_insn (curr_state, insn))
6591 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6592 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6593 && curr_state->accumulated_insns_num % 3 != 0)
6595 free_bundle_state (curr_state);
6599 else if (GET_MODE (insn) != TImode)
6601 if (!try_issue_nops (curr_state, before_nops_num))
6603 if (!try_issue_insn (curr_state, insn))
6605 curr_state->accumulated_insns_num++;
6606 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6607 || asm_noperands (PATTERN (insn)) >= 0)
6609 if (ia64_safe_type (insn) == TYPE_L)
6610 curr_state->accumulated_insns_num++;
6614 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6615 state_transition (curr_state->dfa_state, NULL);
6617 if (!try_issue_nops (curr_state, before_nops_num))
6619 if (!try_issue_insn (curr_state, insn))
6621 curr_state->accumulated_insns_num++;
6622 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6623 || asm_noperands (PATTERN (insn)) >= 0)
6625 /* Finish bundle containing asm insn. */
6626 curr_state->after_nops_num
6627 = 3 - curr_state->accumulated_insns_num % 3;
6628 curr_state->accumulated_insns_num
6629 += 3 - curr_state->accumulated_insns_num % 3;
6631 else if (ia64_safe_type (insn) == TYPE_L)
6632 curr_state->accumulated_insns_num++;
6634 if (ia64_safe_type (insn) == TYPE_B)
6635 curr_state->branch_deviation
6636 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6637 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6639 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6642 struct bundle_state *curr_state1;
6643 struct bundle_state *allocated_states_chain;
6645 curr_state1 = get_free_bundle_state ();
6646 dfa_state = curr_state1->dfa_state;
6647 allocated_states_chain = curr_state1->allocated_states_chain;
6648 *curr_state1 = *curr_state;
6649 curr_state1->dfa_state = dfa_state;
6650 curr_state1->allocated_states_chain = allocated_states_chain;
6651 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6653 curr_state = curr_state1;
6655 if (!try_issue_nops (curr_state,
6656 3 - curr_state->accumulated_insns_num % 3))
6658 curr_state->after_nops_num
6659 = 3 - curr_state->accumulated_insns_num % 3;
6660 curr_state->accumulated_insns_num
6661 += 3 - curr_state->accumulated_insns_num % 3;
6663 if (!insert_bundle_state (curr_state))
6664 free_bundle_state (curr_state);
6668 /* The following function returns position in the two window bundle
6672 get_max_pos (state_t state)
6674 if (cpu_unit_reservation_p (state, pos_6))
6676 else if (cpu_unit_reservation_p (state, pos_5))
6678 else if (cpu_unit_reservation_p (state, pos_4))
6680 else if (cpu_unit_reservation_p (state, pos_3))
6682 else if (cpu_unit_reservation_p (state, pos_2))
6684 else if (cpu_unit_reservation_p (state, pos_1))
6690 /* The function returns code of a possible template for given position
6691 and state. The function should be called only with 2 values of
6692 position equal to 3 or 6. */
6695 get_template (state_t state, int pos)
6700 if (cpu_unit_reservation_p (state, _0mii_))
6702 else if (cpu_unit_reservation_p (state, _0mmi_))
6704 else if (cpu_unit_reservation_p (state, _0mfi_))
6706 else if (cpu_unit_reservation_p (state, _0mmf_))
6708 else if (cpu_unit_reservation_p (state, _0bbb_))
6710 else if (cpu_unit_reservation_p (state, _0mbb_))
6712 else if (cpu_unit_reservation_p (state, _0mib_))
6714 else if (cpu_unit_reservation_p (state, _0mmb_))
6716 else if (cpu_unit_reservation_p (state, _0mfb_))
6718 else if (cpu_unit_reservation_p (state, _0mlx_))
6723 if (cpu_unit_reservation_p (state, _1mii_))
6725 else if (cpu_unit_reservation_p (state, _1mmi_))
6727 else if (cpu_unit_reservation_p (state, _1mfi_))
6729 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6731 else if (cpu_unit_reservation_p (state, _1bbb_))
6733 else if (cpu_unit_reservation_p (state, _1mbb_))
6735 else if (cpu_unit_reservation_p (state, _1mib_))
6737 else if (cpu_unit_reservation_p (state, _1mmb_))
6739 else if (cpu_unit_reservation_p (state, _1mfb_))
6741 else if (cpu_unit_reservation_p (state, _1mlx_))
6750 /* The following function returns an insn important for insn bundling
6751 followed by INSN and before TAIL. */
6754 get_next_important_insn (rtx insn, rtx tail)
6756 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6758 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6759 && GET_CODE (PATTERN (insn)) != USE
6760 && GET_CODE (PATTERN (insn)) != CLOBBER)
6765 /* The following function does insn bundling. Bundling means
6766 inserting templates and nop insns to fit insn groups into permitted
6767 templates. Instruction scheduling uses NDFA (non-deterministic
6768 finite automata) encoding informations about the templates and the
6769 inserted nops. Nondeterminism of the automata permits follows
6770 all possible insn sequences very fast.
6772 Unfortunately it is not possible to get information about inserting
6773 nop insns and used templates from the automata states. The
6774 automata only says that we can issue an insn possibly inserting
6775 some nops before it and using some template. Therefore insn
6776 bundling in this function is implemented by using DFA
6777 (deterministic finite automata). We follows all possible insn
6778 sequences by inserting 0-2 nops (that is what the NDFA describe for
6779 insn scheduling) before/after each insn being bundled. We know the
6780 start of simulated processor cycle from insn scheduling (insn
6781 starting a new cycle has TImode).
6783 Simple implementation of insn bundling would create enormous
6784 number of possible insn sequences satisfying information about new
6785 cycle ticks taken from the insn scheduling. To make the algorithm
6786 practical we use dynamic programming. Each decision (about
6787 inserting nops and implicitly about previous decisions) is described
6788 by structure bundle_state (see above). If we generate the same
6789 bundle state (key is automaton state after issuing the insns and
6790 nops for it), we reuse already generated one. As consequence we
6791 reject some decisions which can not improve the solution and
6792 reduce memory for the algorithm.
6794 When we reach the end of EBB (extended basic block), we choose the
6795 best sequence and then, moving back in EBB, insert templates for
6796 the best alternative. The templates are taken from querying
6797 automaton state for each insn in chosen bundle states.
6799 So the algorithm makes two (forward and backward) passes through
6800 EBB. There is an additional forward pass through EBB for Itanium1
6801 processor. This pass inserts more nops to make dependency between
6802 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6805 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6807 struct bundle_state *curr_state, *next_state, *best_state;
6808 rtx insn, next_insn;
6810 int i, bundle_end_p, only_bundle_end_p, asm_p;
6811 int pos = 0, max_pos, template0, template1;
6814 enum attr_type type;
6817 /* Count insns in the EBB. */
6818 for (insn = NEXT_INSN (prev_head_insn);
6819 insn && insn != tail;
6820 insn = NEXT_INSN (insn))
6826 dfa_clean_insn_cache ();
6827 initiate_bundle_state_table ();
6828 index_to_bundle_states = xmalloc ((insn_num + 2)
6829 * sizeof (struct bundle_state *));
6830 /* First (forward) pass -- generation of bundle states. */
6831 curr_state = get_free_bundle_state ();
6832 curr_state->insn = NULL;
6833 curr_state->before_nops_num = 0;
6834 curr_state->after_nops_num = 0;
6835 curr_state->insn_num = 0;
6836 curr_state->cost = 0;
6837 curr_state->accumulated_insns_num = 0;
6838 curr_state->branch_deviation = 0;
6839 curr_state->next = NULL;
6840 curr_state->originator = NULL;
6841 state_reset (curr_state->dfa_state);
6842 index_to_bundle_states [0] = curr_state;
6844 /* Shift cycle mark if it is put on insn which could be ignored. */
6845 for (insn = NEXT_INSN (prev_head_insn);
6847 insn = NEXT_INSN (insn))
6849 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6850 || GET_CODE (PATTERN (insn)) == USE
6851 || GET_CODE (PATTERN (insn)) == CLOBBER)
6852 && GET_MODE (insn) == TImode)
6854 PUT_MODE (insn, VOIDmode);
6855 for (next_insn = NEXT_INSN (insn);
6857 next_insn = NEXT_INSN (next_insn))
6858 if (INSN_P (next_insn)
6859 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6860 && GET_CODE (PATTERN (next_insn)) != USE
6861 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6863 PUT_MODE (next_insn, TImode);
6867 /* Froward pass: generation of bundle states. */
6868 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6873 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6874 || GET_CODE (PATTERN (insn)) == USE
6875 || GET_CODE (PATTERN (insn)) == CLOBBER)
6877 type = ia64_safe_type (insn);
6878 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6880 index_to_bundle_states [insn_num] = NULL;
6881 for (curr_state = index_to_bundle_states [insn_num - 1];
6883 curr_state = next_state)
6885 pos = curr_state->accumulated_insns_num % 3;
6886 next_state = curr_state->next;
6887 /* We must fill up the current bundle in order to start a
6888 subsequent asm insn in a new bundle. Asm insn is always
6889 placed in a separate bundle. */
6891 = (next_insn != NULL_RTX
6892 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6893 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6894 /* We may fill up the current bundle if it is the cycle end
6895 without a group barrier. */
6897 = (only_bundle_end_p || next_insn == NULL_RTX
6898 || (GET_MODE (next_insn) == TImode
6899 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6900 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6902 /* We need to insert 2 nops for cases like M_MII. To
6903 guarantee issuing all insns on the same cycle for
6904 Itanium 1, we need to issue 2 nops after the first M
6905 insn (MnnMII where n is a nop insn). */
6906 || (type == TYPE_M && ia64_tune == PROCESSOR_ITANIUM
6907 && !bundle_end_p && pos == 1))
6908 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6910 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6912 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6915 if (index_to_bundle_states [insn_num] == NULL)
6917 for (curr_state = index_to_bundle_states [insn_num];
6919 curr_state = curr_state->next)
6920 if (verbose >= 2 && dump)
6922 /* This structure is taken from generated code of the
6923 pipeline hazard recognizer (see file insn-attrtab.c).
6924 Please don't forget to change the structure if a new
6925 automaton is added to .md file. */
6928 unsigned short one_automaton_state;
6929 unsigned short oneb_automaton_state;
6930 unsigned short two_automaton_state;
6931 unsigned short twob_automaton_state;
6936 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6937 curr_state->unique_num,
6938 (curr_state->originator == NULL
6939 ? -1 : curr_state->originator->unique_num),
6941 curr_state->before_nops_num, curr_state->after_nops_num,
6942 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6943 (ia64_tune == PROCESSOR_ITANIUM
6944 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6945 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6949 if (index_to_bundle_states [insn_num] == NULL)
6950 /* We should find a solution because the 2nd insn scheduling has
6953 /* Find a state corresponding to the best insn sequence. */
6955 for (curr_state = index_to_bundle_states [insn_num];
6957 curr_state = curr_state->next)
6958 /* We are just looking at the states with fully filled up last
6959 bundle. The first we prefer insn sequences with minimal cost
6960 then with minimal inserted nops and finally with branch insns
6961 placed in the 3rd slots. */
6962 if (curr_state->accumulated_insns_num % 3 == 0
6963 && (best_state == NULL || best_state->cost > curr_state->cost
6964 || (best_state->cost == curr_state->cost
6965 && (curr_state->accumulated_insns_num
6966 < best_state->accumulated_insns_num
6967 || (curr_state->accumulated_insns_num
6968 == best_state->accumulated_insns_num
6969 && curr_state->branch_deviation
6970 < best_state->branch_deviation)))))
6971 best_state = curr_state;
6972 /* Second (backward) pass: adding nops and templates. */
6973 insn_num = best_state->before_nops_num;
6974 template0 = template1 = -1;
6975 for (curr_state = best_state;
6976 curr_state->originator != NULL;
6977 curr_state = curr_state->originator)
6979 insn = curr_state->insn;
6980 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6981 || asm_noperands (PATTERN (insn)) >= 0);
6983 if (verbose >= 2 && dump)
6987 unsigned short one_automaton_state;
6988 unsigned short oneb_automaton_state;
6989 unsigned short two_automaton_state;
6990 unsigned short twob_automaton_state;
6995 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6996 curr_state->unique_num,
6997 (curr_state->originator == NULL
6998 ? -1 : curr_state->originator->unique_num),
7000 curr_state->before_nops_num, curr_state->after_nops_num,
7001 curr_state->accumulated_insns_num, curr_state->branch_deviation,
7002 (ia64_tune == PROCESSOR_ITANIUM
7003 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7004 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7007 /* Find the position in the current bundle window. The window can
7008 contain at most two bundles. Two bundle window means that
7009 the processor will make two bundle rotation. */
7010 max_pos = get_max_pos (curr_state->dfa_state);
7012 /* The following (negative template number) means that the
7013 processor did one bundle rotation. */
7014 || (max_pos == 3 && template0 < 0))
7016 /* We are at the end of the window -- find template(s) for
7020 template0 = get_template (curr_state->dfa_state, 3);
7023 template1 = get_template (curr_state->dfa_state, 3);
7024 template0 = get_template (curr_state->dfa_state, 6);
7027 if (max_pos > 3 && template1 < 0)
7028 /* It may happen when we have the stop inside a bundle. */
7032 template1 = get_template (curr_state->dfa_state, 3);
7036 /* Emit nops after the current insn. */
7037 for (i = 0; i < curr_state->after_nops_num; i++)
7040 emit_insn_after (nop, insn);
7046 /* We are at the start of a bundle: emit the template
7047 (it should be defined). */
7050 b = gen_bundle_selector (GEN_INT (template0));
7051 ia64_emit_insn_before (b, nop);
7052 /* If we have two bundle window, we make one bundle
7053 rotation. Otherwise template0 will be undefined
7054 (negative value). */
7055 template0 = template1;
7059 /* Move the position backward in the window. Group barrier has
7060 no slot. Asm insn takes all bundle. */
7061 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7062 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7063 && asm_noperands (PATTERN (insn)) < 0)
7065 /* Long insn takes 2 slots. */
7066 if (ia64_safe_type (insn) == TYPE_L)
7071 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7072 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7073 && asm_noperands (PATTERN (insn)) < 0)
7075 /* The current insn is at the bundle start: emit the
7079 b = gen_bundle_selector (GEN_INT (template0));
7080 ia64_emit_insn_before (b, insn);
7081 b = PREV_INSN (insn);
7083 /* See comment above in analogous place for emiting nops
7085 template0 = template1;
7088 /* Emit nops after the current insn. */
7089 for (i = 0; i < curr_state->before_nops_num; i++)
7092 ia64_emit_insn_before (nop, insn);
7093 nop = PREV_INSN (insn);
7100 /* See comment above in analogous place for emiting nops
7104 b = gen_bundle_selector (GEN_INT (template0));
7105 ia64_emit_insn_before (b, insn);
7106 b = PREV_INSN (insn);
7108 template0 = template1;
7113 if (ia64_tune == PROCESSOR_ITANIUM)
7114 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
7115 Itanium1 has a strange design, if the distance between an insn
7116 and dependent MM-insn is less 4 then we have a 6 additional
7117 cycles stall. So we make the distance equal to 4 cycles if it
7119 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7124 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
7125 || GET_CODE (PATTERN (insn)) == USE
7126 || GET_CODE (PATTERN (insn)) == CLOBBER)
7128 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7129 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
7130 /* We found a MM-insn which needs additional cycles. */
7136 /* Now we are searching for a template of the bundle in
7137 which the MM-insn is placed and the position of the
7138 insn in the bundle (0, 1, 2). Also we are searching
7139 for that there is a stop before the insn. */
7140 last = prev_active_insn (insn);
7141 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
7143 last = prev_active_insn (last);
7145 for (;; last = prev_active_insn (last))
7146 if (recog_memoized (last) == CODE_FOR_bundle_selector)
7148 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
7150 /* The insn is in MLX bundle. Change the template
7151 onto MFI because we will add nops before the
7152 insn. It simplifies subsequent code a lot. */
7154 = gen_bundle_selector (GEN_INT (2)); /* -> MFI */
7157 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7159 /* Some check of correctness: the stop is not at the
7160 bundle start, there are no more 3 insns in the bundle,
7161 and the MM-insn is not at the start of bundle with
7163 if ((pred_stop_p && n == 0) || n > 2
7164 || (template0 == 9 && n != 0))
7166 /* Put nops after the insn in the bundle. */
7167 for (j = 3 - n; j > 0; j --)
7168 ia64_emit_insn_before (gen_nop (), insn);
7169 /* It takes into account that we will add more N nops
7170 before the insn lately -- please see code below. */
7171 add_cycles [INSN_UID (insn)]--;
7172 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
7173 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7176 add_cycles [INSN_UID (insn)]--;
7177 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
7179 /* Insert "MII;" template. */
7180 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (0)),
7182 ia64_emit_insn_before (gen_nop (), insn);
7183 ia64_emit_insn_before (gen_nop (), insn);
7186 /* To decrease code size, we use "MI;I;"
7188 ia64_emit_insn_before
7189 (gen_insn_group_barrier (GEN_INT (3)), insn);
7192 ia64_emit_insn_before (gen_nop (), insn);
7193 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7196 /* Put the MM-insn in the same slot of a bundle with the
7197 same template as the original one. */
7198 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
7200 /* To put the insn in the same slot, add necessary number
7202 for (j = n; j > 0; j --)
7203 ia64_emit_insn_before (gen_nop (), insn);
7204 /* Put the stop if the original bundle had it. */
7206 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7210 free (index_to_bundle_states);
7211 finish_bundle_state_table ();
7213 dfa_clean_insn_cache ();
7216 /* The following function is called at the end of scheduling BB or
7217 EBB. After reload, it inserts stop bits and does insn bundling. */
7220 ia64_sched_finish (FILE *dump, int sched_verbose)
7223 fprintf (dump, "// Finishing schedule.\n");
7224 if (!reload_completed)
7226 if (reload_completed)
7228 final_emit_insn_group_barriers (dump);
7229 bundling (dump, sched_verbose, current_sched_info->prev_head,
7230 current_sched_info->next_tail);
7231 if (sched_verbose && dump)
7232 fprintf (dump, "// finishing %d-%d\n",
7233 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
7234 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
7240 /* The following function inserts stop bits in scheduled BB or EBB. */
7243 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7246 int need_barrier_p = 0;
7247 rtx prev_insn = NULL_RTX;
7249 init_insn_group_barriers ();
7251 for (insn = NEXT_INSN (current_sched_info->prev_head);
7252 insn != current_sched_info->next_tail;
7253 insn = NEXT_INSN (insn))
7255 if (GET_CODE (insn) == BARRIER)
7257 rtx last = prev_active_insn (insn);
7261 if (GET_CODE (last) == JUMP_INSN
7262 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7263 last = prev_active_insn (last);
7264 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7265 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7267 init_insn_group_barriers ();
7269 prev_insn = NULL_RTX;
7271 else if (INSN_P (insn))
7273 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7275 init_insn_group_barriers ();
7277 prev_insn = NULL_RTX;
7279 else if (need_barrier_p || group_barrier_needed_p (insn))
7281 if (TARGET_EARLY_STOP_BITS)
7286 last != current_sched_info->prev_head;
7287 last = PREV_INSN (last))
7288 if (INSN_P (last) && GET_MODE (last) == TImode
7289 && stops_p [INSN_UID (last)])
7291 if (last == current_sched_info->prev_head)
7293 last = prev_active_insn (last);
7295 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
7296 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7298 init_insn_group_barriers ();
7299 for (last = NEXT_INSN (last);
7301 last = NEXT_INSN (last))
7303 group_barrier_needed_p (last);
7307 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7309 init_insn_group_barriers ();
7311 group_barrier_needed_p (insn);
7312 prev_insn = NULL_RTX;
7314 else if (recog_memoized (insn) >= 0)
7316 need_barrier_p = (GET_CODE (insn) == CALL_INSN
7317 || GET_CODE (PATTERN (insn)) == ASM_INPUT
7318 || asm_noperands (PATTERN (insn)) >= 0);
7325 /* If the following function returns TRUE, we will use the the DFA
7329 ia64_use_dfa_pipeline_interface (void)
7334 /* If the following function returns TRUE, we will use the the DFA
7338 ia64_first_cycle_multipass_dfa_lookahead (void)
7340 return (reload_completed ? 6 : 4);
7343 /* The following function initiates variable `dfa_pre_cycle_insn'. */
7346 ia64_init_dfa_pre_cycle_insn (void)
7348 if (temp_dfa_state == NULL)
7350 dfa_state_size = state_size ();
7351 temp_dfa_state = xmalloc (dfa_state_size);
7352 prev_cycle_state = xmalloc (dfa_state_size);
7354 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7355 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7356 recog_memoized (dfa_pre_cycle_insn);
7357 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7358 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7359 recog_memoized (dfa_stop_insn);
7362 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7363 used by the DFA insn scheduler. */
7366 ia64_dfa_pre_cycle_insn (void)
7368 return dfa_pre_cycle_insn;
7371 /* The following function returns TRUE if PRODUCER (of type ilog or
7372 ld) produces address for CONSUMER (of type st or stf). */
7375 ia64_st_address_bypass_p (rtx producer, rtx consumer)
7379 if (producer == NULL_RTX || consumer == NULL_RTX)
7381 dest = ia64_single_set (producer);
7382 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7383 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7385 if (GET_CODE (reg) == SUBREG)
7386 reg = SUBREG_REG (reg);
7387 dest = ia64_single_set (consumer);
7388 if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
7389 || GET_CODE (mem) != MEM)
7391 return reg_mentioned_p (reg, mem);
7394 /* The following function returns TRUE if PRODUCER (of type ilog or
7395 ld) produces address for CONSUMER (of type ld or fld). */
7398 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
7400 rtx dest, src, reg, mem;
7402 if (producer == NULL_RTX || consumer == NULL_RTX)
7404 dest = ia64_single_set (producer);
7405 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7406 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7408 if (GET_CODE (reg) == SUBREG)
7409 reg = SUBREG_REG (reg);
7410 src = ia64_single_set (consumer);
7411 if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
7413 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7414 mem = XVECEXP (mem, 0, 0);
7415 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7416 mem = XEXP (mem, 0);
7418 /* Note that LO_SUM is used for GOT loads. */
7419 if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
7422 return reg_mentioned_p (reg, mem);
7425 /* The following function returns TRUE if INSN produces address for a
7426 load/store insn. We will place such insns into M slot because it
7427 decreases its latency time. */
7430 ia64_produce_address_p (rtx insn)
7436 /* Emit pseudo-ops for the assembler to describe predicate relations.
7437 At present this assumes that we only consider predicate pairs to
7438 be mutex, and that the assembler can deduce proper values from
7439 straight-line code. */
7442 emit_predicate_relation_info (void)
7446 FOR_EACH_BB_REVERSE (bb)
7449 rtx head = BB_HEAD (bb);
7451 /* We only need such notes at code labels. */
7452 if (GET_CODE (head) != CODE_LABEL)
7454 if (GET_CODE (NEXT_INSN (head)) == NOTE
7455 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7456 head = NEXT_INSN (head);
7458 for (r = PR_REG (0); r < PR_REG (64); r += 2)
7459 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7461 rtx p = gen_rtx_REG (BImode, r);
7462 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7463 if (head == BB_END (bb))
7469 /* Look for conditional calls that do not return, and protect predicate
7470 relations around them. Otherwise the assembler will assume the call
7471 returns, and complain about uses of call-clobbered predicates after
7473 FOR_EACH_BB_REVERSE (bb)
7475 rtx insn = BB_HEAD (bb);
7479 if (GET_CODE (insn) == CALL_INSN
7480 && GET_CODE (PATTERN (insn)) == COND_EXEC
7481 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7483 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7484 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7485 if (BB_HEAD (bb) == insn)
7487 if (BB_END (bb) == insn)
7491 if (insn == BB_END (bb))
7493 insn = NEXT_INSN (insn);
7498 /* Perform machine dependent operations on the rtl chain INSNS. */
7503 /* We are freeing block_for_insn in the toplev to keep compatibility
7504 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7505 compute_bb_for_insn ();
7507 /* If optimizing, we'll have split before scheduling. */
7509 split_all_insns (0);
7511 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7512 non-optimizing bootstrap. */
7513 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7515 if (ia64_flag_schedule_insns2)
7517 timevar_push (TV_SCHED2);
7518 ia64_final_schedule = 1;
7520 initiate_bundle_states ();
7521 ia64_nop = make_insn_raw (gen_nop ());
7522 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7523 recog_memoized (ia64_nop);
7524 clocks_length = get_max_uid () + 1;
7525 stops_p = xcalloc (1, clocks_length);
7526 if (ia64_tune == PROCESSOR_ITANIUM)
7528 clocks = xcalloc (clocks_length, sizeof (int));
7529 add_cycles = xcalloc (clocks_length, sizeof (int));
7531 if (ia64_tune == PROCESSOR_ITANIUM2)
7533 pos_1 = get_cpu_unit_code ("2_1");
7534 pos_2 = get_cpu_unit_code ("2_2");
7535 pos_3 = get_cpu_unit_code ("2_3");
7536 pos_4 = get_cpu_unit_code ("2_4");
7537 pos_5 = get_cpu_unit_code ("2_5");
7538 pos_6 = get_cpu_unit_code ("2_6");
7539 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7540 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7541 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7542 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7543 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7544 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7545 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7546 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7547 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7548 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7549 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7550 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7551 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7552 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7553 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7554 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7555 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7556 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7557 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7558 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7562 pos_1 = get_cpu_unit_code ("1_1");
7563 pos_2 = get_cpu_unit_code ("1_2");
7564 pos_3 = get_cpu_unit_code ("1_3");
7565 pos_4 = get_cpu_unit_code ("1_4");
7566 pos_5 = get_cpu_unit_code ("1_5");
7567 pos_6 = get_cpu_unit_code ("1_6");
7568 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7569 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7570 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7571 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7572 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7573 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7574 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7575 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7576 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7577 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7578 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7579 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7580 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7581 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7582 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7583 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7584 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7585 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7586 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7587 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7589 schedule_ebbs (rtl_dump_file);
7590 finish_bundle_states ();
7591 if (ia64_tune == PROCESSOR_ITANIUM)
7597 emit_insn_group_barriers (rtl_dump_file);
7599 ia64_final_schedule = 0;
7600 timevar_pop (TV_SCHED2);
7603 emit_all_insn_group_barriers (rtl_dump_file);
7605 /* A call must not be the last instruction in a function, so that the
7606 return address is still within the function, so that unwinding works
7607 properly. Note that IA-64 differs from dwarf2 on this point. */
7608 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7613 insn = get_last_insn ();
7614 if (! INSN_P (insn))
7615 insn = prev_active_insn (insn);
7616 if (GET_CODE (insn) == INSN
7617 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7618 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7621 insn = prev_active_insn (insn);
7623 if (GET_CODE (insn) == CALL_INSN)
7626 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7627 emit_insn (gen_break_f ());
7628 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7633 emit_predicate_relation_info ();
7636 /* Return true if REGNO is used by the epilogue. */
7639 ia64_epilogue_uses (int regno)
7644 /* With a call to a function in another module, we will write a new
7645 value to "gp". After returning from such a call, we need to make
7646 sure the function restores the original gp-value, even if the
7647 function itself does not use the gp anymore. */
7648 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7650 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7651 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7652 /* For functions defined with the syscall_linkage attribute, all
7653 input registers are marked as live at all function exits. This
7654 prevents the register allocator from using the input registers,
7655 which in turn makes it possible to restart a system call after
7656 an interrupt without having to save/restore the input registers.
7657 This also prevents kernel data from leaking to application code. */
7658 return lookup_attribute ("syscall_linkage",
7659 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7662 /* Conditional return patterns can't represent the use of `b0' as
7663 the return address, so we force the value live this way. */
7667 /* Likewise for ar.pfs, which is used by br.ret. */
7675 /* Return true if REGNO is used by the frame unwinder. */
7678 ia64_eh_uses (int regno)
7680 if (! reload_completed)
7683 if (current_frame_info.reg_save_b0
7684 && regno == current_frame_info.reg_save_b0)
7686 if (current_frame_info.reg_save_pr
7687 && regno == current_frame_info.reg_save_pr)
7689 if (current_frame_info.reg_save_ar_pfs
7690 && regno == current_frame_info.reg_save_ar_pfs)
7692 if (current_frame_info.reg_save_ar_unat
7693 && regno == current_frame_info.reg_save_ar_unat)
7695 if (current_frame_info.reg_save_ar_lc
7696 && regno == current_frame_info.reg_save_ar_lc)
7702 /* Return true if this goes in small data/bss. */
7704 /* ??? We could also support own long data here. Generating movl/add/ld8
7705 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7706 code faster because there is one less load. This also includes incomplete
7707 types which can't go in sdata/sbss. */
7710 ia64_in_small_data_p (tree exp)
7712 if (TARGET_NO_SDATA)
7715 /* We want to merge strings, so we never consider them small data. */
7716 if (TREE_CODE (exp) == STRING_CST)
7719 /* Functions are never small data. */
7720 if (TREE_CODE (exp) == FUNCTION_DECL)
7723 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7725 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7726 if (strcmp (section, ".sdata") == 0
7727 || strcmp (section, ".sbss") == 0)
7732 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7734 /* If this is an incomplete type with size 0, then we can't put it
7735 in sdata because it might be too big when completed. */
7736 if (size > 0 && size <= ia64_section_threshold)
7743 /* Output assembly directives for prologue regions. */
7745 /* The current basic block number. */
7747 static bool last_block;
7749 /* True if we need a copy_state command at the start of the next block. */
7751 static bool need_copy_state;
7753 /* The function emits unwind directives for the start of an epilogue. */
7756 process_epilogue (void)
7758 /* If this isn't the last block of the function, then we need to label the
7759 current state, and copy it back in at the start of the next block. */
7763 fprintf (asm_out_file, "\t.label_state 1\n");
7764 need_copy_state = true;
7767 fprintf (asm_out_file, "\t.restore sp\n");
7770 /* This function processes a SET pattern looking for specific patterns
7771 which result in emitting an assembly directive required for unwinding. */
7774 process_set (FILE *asm_out_file, rtx pat)
7776 rtx src = SET_SRC (pat);
7777 rtx dest = SET_DEST (pat);
7778 int src_regno, dest_regno;
7780 /* Look for the ALLOC insn. */
7781 if (GET_CODE (src) == UNSPEC_VOLATILE
7782 && XINT (src, 1) == UNSPECV_ALLOC
7783 && GET_CODE (dest) == REG)
7785 dest_regno = REGNO (dest);
7787 /* If this isn't the final destination for ar.pfs, the alloc
7788 shouldn't have been marked frame related. */
7789 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7792 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7793 ia64_dbx_register_number (dest_regno));
7797 /* Look for SP = .... */
7798 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7800 if (GET_CODE (src) == PLUS)
7802 rtx op0 = XEXP (src, 0);
7803 rtx op1 = XEXP (src, 1);
7804 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7806 if (INTVAL (op1) < 0)
7807 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7810 process_epilogue ();
7815 else if (GET_CODE (src) == REG
7816 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7817 process_epilogue ();
7824 /* Register move we need to look at. */
7825 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7827 src_regno = REGNO (src);
7828 dest_regno = REGNO (dest);
7833 /* Saving return address pointer. */
7834 if (dest_regno != current_frame_info.reg_save_b0)
7836 fprintf (asm_out_file, "\t.save rp, r%d\n",
7837 ia64_dbx_register_number (dest_regno));
7841 if (dest_regno != current_frame_info.reg_save_pr)
7843 fprintf (asm_out_file, "\t.save pr, r%d\n",
7844 ia64_dbx_register_number (dest_regno));
7847 case AR_UNAT_REGNUM:
7848 if (dest_regno != current_frame_info.reg_save_ar_unat)
7850 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7851 ia64_dbx_register_number (dest_regno));
7855 if (dest_regno != current_frame_info.reg_save_ar_lc)
7857 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7858 ia64_dbx_register_number (dest_regno));
7861 case STACK_POINTER_REGNUM:
7862 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7863 || ! frame_pointer_needed)
7865 fprintf (asm_out_file, "\t.vframe r%d\n",
7866 ia64_dbx_register_number (dest_regno));
7870 /* Everything else should indicate being stored to memory. */
7875 /* Memory store we need to look at. */
7876 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7882 if (GET_CODE (XEXP (dest, 0)) == REG)
7884 base = XEXP (dest, 0);
7887 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7888 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7890 base = XEXP (XEXP (dest, 0), 0);
7891 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7896 if (base == hard_frame_pointer_rtx)
7898 saveop = ".savepsp";
7901 else if (base == stack_pointer_rtx)
7906 src_regno = REGNO (src);
7910 if (current_frame_info.reg_save_b0 != 0)
7912 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7916 if (current_frame_info.reg_save_pr != 0)
7918 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7922 if (current_frame_info.reg_save_ar_lc != 0)
7924 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7928 if (current_frame_info.reg_save_ar_pfs != 0)
7930 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7933 case AR_UNAT_REGNUM:
7934 if (current_frame_info.reg_save_ar_unat != 0)
7936 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7943 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7944 1 << (src_regno - GR_REG (4)));
7952 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7953 1 << (src_regno - BR_REG (1)));
7960 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7961 1 << (src_regno - FR_REG (2)));
7964 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7965 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7966 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7967 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7968 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7969 1 << (src_regno - FR_REG (12)));
7981 /* This function looks at a single insn and emits any directives
7982 required to unwind this insn. */
7984 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
7986 if (flag_unwind_tables
7987 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7991 if (GET_CODE (insn) == NOTE
7992 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7994 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7996 /* Restore unwind state from immediately before the epilogue. */
7997 if (need_copy_state)
7999 fprintf (asm_out_file, "\t.body\n");
8000 fprintf (asm_out_file, "\t.copy_state 1\n");
8001 need_copy_state = false;
8005 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
8008 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
8010 pat = XEXP (pat, 0);
8012 pat = PATTERN (insn);
8014 switch (GET_CODE (pat))
8017 process_set (asm_out_file, pat);
8023 int limit = XVECLEN (pat, 0);
8024 for (par_index = 0; par_index < limit; par_index++)
8026 rtx x = XVECEXP (pat, 0, par_index);
8027 if (GET_CODE (x) == SET)
8028 process_set (asm_out_file, x);
8041 ia64_init_builtins (void)
8043 tree psi_type_node = build_pointer_type (integer_type_node);
8044 tree pdi_type_node = build_pointer_type (long_integer_type_node);
8046 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
8047 tree si_ftype_psi_si_si
8048 = build_function_type_list (integer_type_node,
8049 psi_type_node, integer_type_node,
8050 integer_type_node, NULL_TREE);
8052 /* __sync_val_compare_and_swap_di */
8053 tree di_ftype_pdi_di_di
8054 = build_function_type_list (long_integer_type_node,
8055 pdi_type_node, long_integer_type_node,
8056 long_integer_type_node, NULL_TREE);
8057 /* __sync_bool_compare_and_swap_di */
8058 tree si_ftype_pdi_di_di
8059 = build_function_type_list (integer_type_node,
8060 pdi_type_node, long_integer_type_node,
8061 long_integer_type_node, NULL_TREE);
8062 /* __sync_synchronize */
8063 tree void_ftype_void
8064 = build_function_type (void_type_node, void_list_node);
8066 /* __sync_lock_test_and_set_si */
8067 tree si_ftype_psi_si
8068 = build_function_type_list (integer_type_node,
8069 psi_type_node, integer_type_node, NULL_TREE);
8071 /* __sync_lock_test_and_set_di */
8072 tree di_ftype_pdi_di
8073 = build_function_type_list (long_integer_type_node,
8074 pdi_type_node, long_integer_type_node,
8077 /* __sync_lock_release_si */
8079 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
8081 /* __sync_lock_release_di */
8083 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
8088 /* The __fpreg type. */
8089 fpreg_type = make_node (REAL_TYPE);
8090 /* ??? The back end should know to load/save __fpreg variables using
8091 the ldf.fill and stf.spill instructions. */
8092 TYPE_PRECISION (fpreg_type) = 96;
8093 layout_type (fpreg_type);
8094 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
8096 /* The __float80 type. */
8097 float80_type = make_node (REAL_TYPE);
8098 TYPE_PRECISION (float80_type) = 96;
8099 layout_type (float80_type);
8100 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
8102 /* The __float128 type. */
8105 tree float128_type = make_node (REAL_TYPE);
8106 TYPE_PRECISION (float128_type) = 128;
8107 layout_type (float128_type);
8108 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
8111 /* Under HPUX, this is a synonym for "long double". */
8112 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
8115 #define def_builtin(name, type, code) \
8116 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
8118 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
8119 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
8120 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
8121 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
8122 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
8123 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
8124 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
8125 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
8127 def_builtin ("__sync_synchronize", void_ftype_void,
8128 IA64_BUILTIN_SYNCHRONIZE);
8130 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
8131 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
8132 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
8133 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
8134 def_builtin ("__sync_lock_release_si", void_ftype_psi,
8135 IA64_BUILTIN_LOCK_RELEASE_SI);
8136 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
8137 IA64_BUILTIN_LOCK_RELEASE_DI);
8139 def_builtin ("__builtin_ia64_bsp",
8140 build_function_type (ptr_type_node, void_list_node),
8143 def_builtin ("__builtin_ia64_flushrs",
8144 build_function_type (void_type_node, void_list_node),
8145 IA64_BUILTIN_FLUSHRS);
8147 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
8148 IA64_BUILTIN_FETCH_AND_ADD_SI);
8149 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
8150 IA64_BUILTIN_FETCH_AND_SUB_SI);
8151 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
8152 IA64_BUILTIN_FETCH_AND_OR_SI);
8153 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
8154 IA64_BUILTIN_FETCH_AND_AND_SI);
8155 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
8156 IA64_BUILTIN_FETCH_AND_XOR_SI);
8157 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
8158 IA64_BUILTIN_FETCH_AND_NAND_SI);
8160 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
8161 IA64_BUILTIN_ADD_AND_FETCH_SI);
8162 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
8163 IA64_BUILTIN_SUB_AND_FETCH_SI);
8164 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
8165 IA64_BUILTIN_OR_AND_FETCH_SI);
8166 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
8167 IA64_BUILTIN_AND_AND_FETCH_SI);
8168 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
8169 IA64_BUILTIN_XOR_AND_FETCH_SI);
8170 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
8171 IA64_BUILTIN_NAND_AND_FETCH_SI);
8173 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
8174 IA64_BUILTIN_FETCH_AND_ADD_DI);
8175 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
8176 IA64_BUILTIN_FETCH_AND_SUB_DI);
8177 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
8178 IA64_BUILTIN_FETCH_AND_OR_DI);
8179 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
8180 IA64_BUILTIN_FETCH_AND_AND_DI);
8181 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
8182 IA64_BUILTIN_FETCH_AND_XOR_DI);
8183 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
8184 IA64_BUILTIN_FETCH_AND_NAND_DI);
8186 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
8187 IA64_BUILTIN_ADD_AND_FETCH_DI);
8188 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
8189 IA64_BUILTIN_SUB_AND_FETCH_DI);
8190 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
8191 IA64_BUILTIN_OR_AND_FETCH_DI);
8192 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
8193 IA64_BUILTIN_AND_AND_FETCH_DI);
8194 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
8195 IA64_BUILTIN_XOR_AND_FETCH_DI);
8196 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
8197 IA64_BUILTIN_NAND_AND_FETCH_DI);
8202 /* Expand fetch_and_op intrinsics. The basic code sequence is:
8210 cmpxchgsz.acq tmp = [ptr], tmp
8211 } while (tmp != ret)
8215 ia64_expand_fetch_and_op (optab binoptab, enum machine_mode mode,
8216 tree arglist, rtx target)
8218 rtx ret, label, tmp, ccv, insn, mem, value;
8221 arg0 = TREE_VALUE (arglist);
8222 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8223 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8224 #ifdef POINTERS_EXTEND_UNSIGNED
8225 if (GET_MODE(mem) != Pmode)
8226 mem = convert_memory_address (Pmode, mem);
8228 value = expand_expr (arg1, NULL_RTX, mode, 0);
8230 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8231 MEM_VOLATILE_P (mem) = 1;
8233 if (target && register_operand (target, mode))
8236 ret = gen_reg_rtx (mode);
8238 emit_insn (gen_mf ());
8240 /* Special case for fetchadd instructions. */
8241 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
8244 insn = gen_fetchadd_acq_si (ret, mem, value);
8246 insn = gen_fetchadd_acq_di (ret, mem, value);
8251 tmp = gen_reg_rtx (mode);
8252 /* ar.ccv must always be loaded with a zero-extended DImode value. */
8253 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8254 emit_move_insn (tmp, mem);
8256 label = gen_label_rtx ();
8258 emit_move_insn (ret, tmp);
8259 convert_move (ccv, tmp, /*unsignedp=*/1);
8261 /* Perform the specific operation. Special case NAND by noticing
8262 one_cmpl_optab instead. */
8263 if (binoptab == one_cmpl_optab)
8265 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8266 binoptab = and_optab;
8268 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
8271 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
8273 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
8276 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
8281 /* Expand op_and_fetch intrinsics. The basic code sequence is:
8288 ret = tmp <op> value;
8289 cmpxchgsz.acq tmp = [ptr], ret
8290 } while (tmp != old)
8294 ia64_expand_op_and_fetch (optab binoptab, enum machine_mode mode,
8295 tree arglist, rtx target)
8297 rtx old, label, tmp, ret, ccv, insn, mem, value;
8300 arg0 = TREE_VALUE (arglist);
8301 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8302 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8303 #ifdef POINTERS_EXTEND_UNSIGNED
8304 if (GET_MODE(mem) != Pmode)
8305 mem = convert_memory_address (Pmode, mem);
8308 value = expand_expr (arg1, NULL_RTX, mode, 0);
8310 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8311 MEM_VOLATILE_P (mem) = 1;
8313 if (target && ! register_operand (target, mode))
8316 emit_insn (gen_mf ());
8317 tmp = gen_reg_rtx (mode);
8318 old = gen_reg_rtx (mode);
8319 /* ar.ccv must always be loaded with a zero-extended DImode value. */
8320 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8322 emit_move_insn (tmp, mem);
8324 label = gen_label_rtx ();
8326 emit_move_insn (old, tmp);
8327 convert_move (ccv, tmp, /*unsignedp=*/1);
8329 /* Perform the specific operation. Special case NAND by noticing
8330 one_cmpl_optab instead. */
8331 if (binoptab == one_cmpl_optab)
8333 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8334 binoptab = and_optab;
8336 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
8339 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
8341 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
8344 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
8349 /* Expand val_ and bool_compare_and_swap. For val_ we want:
8353 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
8356 For bool_ it's the same except return ret == oldval.
8360 ia64_expand_compare_and_swap (enum machine_mode rmode, enum machine_mode mode,
8361 int boolp, tree arglist, rtx target)
8363 tree arg0, arg1, arg2;
8364 rtx mem, old, new, ccv, tmp, insn;
8366 arg0 = TREE_VALUE (arglist);
8367 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8368 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8369 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8370 old = expand_expr (arg1, NULL_RTX, mode, 0);
8371 new = expand_expr (arg2, NULL_RTX, mode, 0);
8373 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8374 MEM_VOLATILE_P (mem) = 1;
8376 if (GET_MODE (old) != mode)
8377 old = convert_to_mode (mode, old, /*unsignedp=*/1);
8378 if (GET_MODE (new) != mode)
8379 new = convert_to_mode (mode, new, /*unsignedp=*/1);
8381 if (! register_operand (old, mode))
8382 old = copy_to_mode_reg (mode, old);
8383 if (! register_operand (new, mode))
8384 new = copy_to_mode_reg (mode, new);
8386 if (! boolp && target && register_operand (target, mode))
8389 tmp = gen_reg_rtx (mode);
8391 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8392 convert_move (ccv, old, /*unsignedp=*/1);
8393 emit_insn (gen_mf ());
8395 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
8397 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
8403 target = gen_reg_rtx (rmode);
8404 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
8410 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
8413 ia64_expand_lock_test_and_set (enum machine_mode mode, tree arglist,
8417 rtx mem, new, ret, insn;
8419 arg0 = TREE_VALUE (arglist);
8420 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8421 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8422 new = expand_expr (arg1, NULL_RTX, mode, 0);
8424 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8425 MEM_VOLATILE_P (mem) = 1;
8426 if (! register_operand (new, mode))
8427 new = copy_to_mode_reg (mode, new);
8429 if (target && register_operand (target, mode))
8432 ret = gen_reg_rtx (mode);
8435 insn = gen_xchgsi (ret, mem, new);
8437 insn = gen_xchgdi (ret, mem, new);
8443 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
8446 ia64_expand_lock_release (enum machine_mode mode, tree arglist,
8447 rtx target ATTRIBUTE_UNUSED)
8452 arg0 = TREE_VALUE (arglist);
8453 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8455 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8456 MEM_VOLATILE_P (mem) = 1;
8458 emit_move_insn (mem, const0_rtx);
8464 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8465 enum machine_mode mode ATTRIBUTE_UNUSED,
8466 int ignore ATTRIBUTE_UNUSED)
8468 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8469 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8470 tree arglist = TREE_OPERAND (exp, 1);
8471 enum machine_mode rmode = VOIDmode;
8475 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8476 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8481 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8482 case IA64_BUILTIN_LOCK_RELEASE_SI:
8483 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8484 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8485 case IA64_BUILTIN_FETCH_AND_OR_SI:
8486 case IA64_BUILTIN_FETCH_AND_AND_SI:
8487 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8488 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8489 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8490 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8491 case IA64_BUILTIN_OR_AND_FETCH_SI:
8492 case IA64_BUILTIN_AND_AND_FETCH_SI:
8493 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8494 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8498 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8503 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8508 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8509 case IA64_BUILTIN_LOCK_RELEASE_DI:
8510 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8511 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8512 case IA64_BUILTIN_FETCH_AND_OR_DI:
8513 case IA64_BUILTIN_FETCH_AND_AND_DI:
8514 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8515 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8516 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8517 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8518 case IA64_BUILTIN_OR_AND_FETCH_DI:
8519 case IA64_BUILTIN_AND_AND_FETCH_DI:
8520 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8521 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8531 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8532 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8533 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8536 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8537 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8538 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8541 case IA64_BUILTIN_SYNCHRONIZE:
8542 emit_insn (gen_mf ());
8545 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8546 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8547 return ia64_expand_lock_test_and_set (mode, arglist, target);
8549 case IA64_BUILTIN_LOCK_RELEASE_SI:
8550 case IA64_BUILTIN_LOCK_RELEASE_DI:
8551 return ia64_expand_lock_release (mode, arglist, target);
8553 case IA64_BUILTIN_BSP:
8554 if (! target || ! register_operand (target, DImode))
8555 target = gen_reg_rtx (DImode);
8556 emit_insn (gen_bsp_value (target));
8557 #ifdef POINTERS_EXTEND_UNSIGNED
8558 target = convert_memory_address (ptr_mode, target);
8562 case IA64_BUILTIN_FLUSHRS:
8563 emit_insn (gen_flushrs ());
8566 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8567 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8568 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8570 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8571 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8572 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8574 case IA64_BUILTIN_FETCH_AND_OR_SI:
8575 case IA64_BUILTIN_FETCH_AND_OR_DI:
8576 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8578 case IA64_BUILTIN_FETCH_AND_AND_SI:
8579 case IA64_BUILTIN_FETCH_AND_AND_DI:
8580 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8582 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8583 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8584 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8586 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8587 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8588 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8590 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8591 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8592 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8594 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8595 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8596 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8598 case IA64_BUILTIN_OR_AND_FETCH_SI:
8599 case IA64_BUILTIN_OR_AND_FETCH_DI:
8600 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8602 case IA64_BUILTIN_AND_AND_FETCH_SI:
8603 case IA64_BUILTIN_AND_AND_FETCH_DI:
8604 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8606 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8607 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8608 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8610 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8611 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8612 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8621 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8622 most significant bits of the stack slot. */
8625 ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8627 /* Exception to normal case for structures/unions/etc. */
8629 if (type && AGGREGATE_TYPE_P (type)
8630 && int_size_in_bytes (type) < UNITS_PER_WORD)
8633 /* Fall back to the default. */
8634 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8637 /* Linked list of all external functions that are to be emitted by GCC.
8638 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8639 order to avoid putting out names that are never really used. */
8641 struct extern_func_list
8643 struct extern_func_list *next; /* next external */
8644 char *name; /* name of the external */
8645 } *extern_func_head = 0;
8648 ia64_hpux_add_extern_decl (const char *name)
8650 struct extern_func_list *p;
8652 p = (struct extern_func_list *) xmalloc (sizeof (struct extern_func_list));
8653 p->name = xmalloc (strlen (name) + 1);
8654 strcpy(p->name, name);
8655 p->next = extern_func_head;
8656 extern_func_head = p;
8659 /* Print out the list of used global functions. */
8662 ia64_hpux_file_end (void)
8664 while (extern_func_head)
8666 const char *real_name;
8669 real_name = (* targetm.strip_name_encoding) (extern_func_head->name);
8670 decl = maybe_get_identifier (real_name);
8673 || (! TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (decl)))
8676 TREE_ASM_WRITTEN (decl) = 1;
8677 (*targetm.asm_out.globalize_label) (asm_out_file,
8678 extern_func_head->name);
8679 fputs (TYPE_ASM_OP, asm_out_file);
8680 assemble_name (asm_out_file, extern_func_head->name);
8681 putc (',', asm_out_file);
8682 fprintf (asm_out_file, TYPE_OPERAND_FMT, "function");
8683 putc ('\n', asm_out_file);
8685 extern_func_head = extern_func_head->next;
8689 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8692 ia64_hpux_init_libfuncs (void)
8694 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8695 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8696 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8697 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8698 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8699 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8700 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8701 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8703 /* ia64_expand_compare uses this. */
8704 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8706 /* These should never be used. */
8707 set_optab_libfunc (eq_optab, TFmode, 0);
8708 set_optab_libfunc (ne_optab, TFmode, 0);
8709 set_optab_libfunc (gt_optab, TFmode, 0);
8710 set_optab_libfunc (ge_optab, TFmode, 0);
8711 set_optab_libfunc (lt_optab, TFmode, 0);
8712 set_optab_libfunc (le_optab, TFmode, 0);
8714 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8715 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8716 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8717 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8718 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8719 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8721 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8722 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8723 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8724 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8726 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8727 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8730 /* Rename the division and modulus functions in VMS. */
8733 ia64_vms_init_libfuncs (void)
8735 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8736 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8737 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8738 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8739 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8740 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8741 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8742 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8745 /* Switch to the section to which we should output X. The only thing
8746 special we do here is to honor small data. */
8749 ia64_select_rtx_section (enum machine_mode mode, rtx x,
8750 unsigned HOST_WIDE_INT align)
8752 if (GET_MODE_SIZE (mode) > 0
8753 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8756 default_elf_select_rtx_section (mode, x, align);
8759 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8760 Pretend flag_pic is always set. */
8763 ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8765 default_elf_select_section_1 (exp, reloc, align, true);
8769 ia64_rwreloc_unique_section (tree decl, int reloc)
8771 default_unique_section_1 (decl, reloc, true);
8775 ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8776 unsigned HOST_WIDE_INT align)
8778 int save_pic = flag_pic;
8780 ia64_select_rtx_section (mode, x, align);
8781 flag_pic = save_pic;
8785 ia64_rwreloc_section_type_flags (tree decl, const char *name, int reloc)
8787 return default_section_type_flags_1 (decl, name, reloc, true);
8791 /* Output the assembler code for a thunk function. THUNK_DECL is the
8792 declaration for the thunk function itself, FUNCTION is the decl for
8793 the target function. DELTA is an immediate constant offset to be
8794 added to THIS. If VCALL_OFFSET is nonzero, the word at
8795 *(*this + vcall_offset) should be added to THIS. */
8798 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8799 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8802 rtx this, insn, funexp;
8804 reload_completed = 1;
8805 epilogue_completed = 1;
8808 /* Set things up as ia64_expand_prologue might. */
8809 last_scratch_gr_reg = 15;
8811 memset (¤t_frame_info, 0, sizeof (current_frame_info));
8812 current_frame_info.spill_cfa_off = -16;
8813 current_frame_info.n_input_regs = 1;
8814 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8816 if (!TARGET_REG_NAMES)
8817 reg_names[IN_REG (0)] = ia64_reg_numbers[0];
8819 /* Mark the end of the (empty) prologue. */
8820 emit_note (NOTE_INSN_PROLOGUE_END);
8822 this = gen_rtx_REG (Pmode, IN_REG (0));
8825 rtx tmp = gen_rtx_REG (ptr_mode, IN_REG (0));
8826 REG_POINTER (tmp) = 1;
8827 if (delta && CONST_OK_FOR_I (delta))
8829 emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8833 emit_insn (gen_ptr_extend (this, tmp));
8836 /* Apply the constant offset, if required. */
8839 rtx delta_rtx = GEN_INT (delta);
8841 if (!CONST_OK_FOR_I (delta))
8843 rtx tmp = gen_rtx_REG (Pmode, 2);
8844 emit_move_insn (tmp, delta_rtx);
8847 emit_insn (gen_adddi3 (this, this, delta_rtx));
8850 /* Apply the offset from the vtable, if required. */
8853 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8854 rtx tmp = gen_rtx_REG (Pmode, 2);
8858 rtx t = gen_rtx_REG (ptr_mode, 2);
8859 REG_POINTER (t) = 1;
8860 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8861 if (CONST_OK_FOR_I (vcall_offset))
8863 emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8868 emit_insn (gen_ptr_extend (tmp, t));
8871 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8875 if (!CONST_OK_FOR_J (vcall_offset))
8877 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8878 emit_move_insn (tmp2, vcall_offset_rtx);
8879 vcall_offset_rtx = tmp2;
8881 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8885 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8886 gen_rtx_MEM (ptr_mode, tmp));
8888 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8890 emit_insn (gen_adddi3 (this, this, tmp));
8893 /* Generate a tail call to the target function. */
8894 if (! TREE_USED (function))
8896 assemble_external (function);
8897 TREE_USED (function) = 1;
8899 funexp = XEXP (DECL_RTL (function), 0);
8900 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8901 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8902 insn = get_last_insn ();
8903 SIBLING_CALL_P (insn) = 1;
8905 /* Code generation for calls relies on splitting. */
8906 reload_completed = 1;
8907 epilogue_completed = 1;
8908 try_split (PATTERN (insn), insn, 0);
8912 /* Run just enough of rest_of_compilation to get the insns emitted.
8913 There's not really enough bulk here to make other passes such as
8914 instruction scheduling worth while. Note that use_thunk calls
8915 assemble_start_function and assemble_end_function. */
8917 insn_locators_initialize ();
8918 emit_all_insn_group_barriers (NULL);
8919 insn = get_insns ();
8920 shorten_branches (insn);
8921 final_start_function (insn, file, 1);
8922 final (insn, file, 1, 0);
8923 final_end_function ();
8925 reload_completed = 0;
8926 epilogue_completed = 0;
8930 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
8933 ia64_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
8934 int incoming ATTRIBUTE_UNUSED)
8936 return gen_rtx_REG (Pmode, GR_REG (8));
8939 #include "gt-ia64.h"