1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
46 #include "sched-int.h"
49 #include "target-def.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
56 /* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58 int ia64_asm_output_label = 0;
60 /* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62 struct rtx_def * ia64_compare_op0;
63 struct rtx_def * ia64_compare_op1;
65 /* Register names for ia64_expand_prologue. */
66 static const char * const ia64_reg_numbers[96] =
67 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
80 /* ??? These strings could be shared with REGISTER_NAMES. */
81 static const char * const ia64_input_reg_names[8] =
82 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_local_reg_names[80] =
86 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
97 /* ??? These strings could be shared with REGISTER_NAMES. */
98 static const char * const ia64_output_reg_names[8] =
99 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
101 /* String used with the -mfixed-range= option. */
102 const char *ia64_fixed_range_string;
104 /* Determines whether we use adds, addl, or movl to generate our
105 TLS immediate offsets. */
106 int ia64_tls_size = 22;
108 /* String used with the -mtls-size= option. */
109 const char *ia64_tls_size_string;
111 /* Which cpu are we scheduling for. */
112 enum processor_type ia64_tune;
114 /* String used with the -tune= option. */
115 const char *ia64_tune_string;
117 /* Determines whether we run our final scheduling pass or not. We always
118 avoid the normal second scheduling pass. */
119 static int ia64_flag_schedule_insns2;
121 /* Determines whether we run variable tracking in machine dependent
123 static int ia64_flag_var_tracking;
125 /* Variables which are this size or smaller are put in the sdata/sbss
128 unsigned int ia64_section_threshold;
130 /* The following variable is used by the DFA insn scheduler. The value is
131 TRUE if we do insn bundling instead of insn scheduling. */
134 /* Structure to be filled in by ia64_compute_frame_size with register
135 save masks and offsets for the current function. */
137 struct ia64_frame_info
139 HOST_WIDE_INT total_size; /* size of the stack frame, not including
140 the caller's scratch area. */
141 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
142 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
143 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
144 HARD_REG_SET mask; /* mask of saved registers. */
145 unsigned int gr_used_mask; /* mask of registers in use as gr spill
146 registers or long-term scratches. */
147 int n_spilled; /* number of spilled registers. */
148 int reg_fp; /* register for fp. */
149 int reg_save_b0; /* save register for b0. */
150 int reg_save_pr; /* save register for prs. */
151 int reg_save_ar_pfs; /* save register for ar.pfs. */
152 int reg_save_ar_unat; /* save register for ar.unat. */
153 int reg_save_ar_lc; /* save register for ar.lc. */
154 int reg_save_gp; /* save register for gp. */
155 int n_input_regs; /* number of input registers used. */
156 int n_local_regs; /* number of local registers used. */
157 int n_output_regs; /* number of output registers used. */
158 int n_rotate_regs; /* number of rotating registers used. */
160 char need_regstk; /* true if a .regstk directive needed. */
161 char initialized; /* true if the data is finalized. */
164 /* Current frame information calculated by ia64_compute_frame_size. */
165 static struct ia64_frame_info current_frame_info;
167 static int ia64_first_cycle_multipass_dfa_lookahead (void);
168 static void ia64_dependencies_evaluation_hook (rtx, rtx);
169 static void ia64_init_dfa_pre_cycle_insn (void);
170 static rtx ia64_dfa_pre_cycle_insn (void);
171 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
172 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
173 static rtx gen_tls_get_addr (void);
174 static rtx gen_thread_pointer (void);
175 static rtx ia64_expand_tls_address (enum tls_model, rtx, rtx);
176 static int find_gr_spill (int);
177 static int next_scratch_gr_reg (void);
178 static void mark_reg_gr_used_mask (rtx, void *);
179 static void ia64_compute_frame_size (HOST_WIDE_INT);
180 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
181 static void finish_spill_pointers (void);
182 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
183 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
184 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
185 static rtx gen_movdi_x (rtx, rtx, rtx);
186 static rtx gen_fr_spill_x (rtx, rtx, rtx);
187 static rtx gen_fr_restore_x (rtx, rtx, rtx);
189 static enum machine_mode hfa_element_mode (tree, int);
190 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
192 static bool ia64_function_ok_for_sibcall (tree, tree);
193 static bool ia64_return_in_memory (tree, tree);
194 static bool ia64_rtx_costs (rtx, int, int, int *);
195 static void fix_range (const char *);
196 static struct machine_function * ia64_init_machine_status (void);
197 static void emit_insn_group_barriers (FILE *);
198 static void emit_all_insn_group_barriers (FILE *);
199 static void final_emit_insn_group_barriers (FILE *);
200 static void emit_predicate_relation_info (void);
201 static void ia64_reorg (void);
202 static bool ia64_in_small_data_p (tree);
203 static void process_epilogue (void);
204 static int process_set (FILE *, rtx);
206 static rtx ia64_expand_fetch_and_op (optab, enum machine_mode, tree, rtx);
207 static rtx ia64_expand_op_and_fetch (optab, enum machine_mode, tree, rtx);
208 static rtx ia64_expand_compare_and_swap (enum machine_mode, enum machine_mode,
210 static rtx ia64_expand_lock_test_and_set (enum machine_mode, tree, rtx);
211 static rtx ia64_expand_lock_release (enum machine_mode, tree, rtx);
212 static bool ia64_assemble_integer (rtx, unsigned int, int);
213 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
214 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
215 static void ia64_output_function_end_prologue (FILE *);
217 static int ia64_issue_rate (void);
218 static int ia64_adjust_cost (rtx, rtx, rtx, int);
219 static void ia64_sched_init (FILE *, int, int);
220 static void ia64_sched_finish (FILE *, int);
221 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
222 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
223 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
224 static int ia64_variable_issue (FILE *, int, rtx, int);
226 static struct bundle_state *get_free_bundle_state (void);
227 static void free_bundle_state (struct bundle_state *);
228 static void initiate_bundle_states (void);
229 static void finish_bundle_states (void);
230 static unsigned bundle_state_hash (const void *);
231 static int bundle_state_eq_p (const void *, const void *);
232 static int insert_bundle_state (struct bundle_state *);
233 static void initiate_bundle_state_table (void);
234 static void finish_bundle_state_table (void);
235 static int try_issue_nops (struct bundle_state *, int);
236 static int try_issue_insn (struct bundle_state *, rtx);
237 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
238 static int get_max_pos (state_t);
239 static int get_template (state_t, int);
241 static rtx get_next_important_insn (rtx, rtx);
242 static void bundling (FILE *, int, rtx, rtx);
244 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
245 HOST_WIDE_INT, tree);
246 static void ia64_file_start (void);
248 static void ia64_select_rtx_section (enum machine_mode, rtx,
249 unsigned HOST_WIDE_INT);
250 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
252 static void ia64_rwreloc_unique_section (tree, int)
254 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
255 unsigned HOST_WIDE_INT)
257 static unsigned int ia64_rwreloc_section_type_flags (tree, const char *, int)
260 static void ia64_hpux_add_extern_decl (tree decl)
262 static void ia64_hpux_file_end (void)
264 static void ia64_init_libfuncs (void)
266 static void ia64_hpux_init_libfuncs (void)
268 static void ia64_sysv4_init_libfuncs (void)
270 static void ia64_vms_init_libfuncs (void)
273 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
274 static void ia64_encode_section_info (tree, rtx, int);
275 static rtx ia64_struct_value_rtx (tree, int);
276 static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
279 /* Table of valid machine attributes. */
280 static const struct attribute_spec ia64_attribute_table[] =
282 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
283 { "syscall_linkage", 0, 0, false, true, true, NULL },
284 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
285 { NULL, 0, 0, false, false, false, NULL }
288 /* Initialize the GCC target structure. */
289 #undef TARGET_ATTRIBUTE_TABLE
290 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
292 #undef TARGET_INIT_BUILTINS
293 #define TARGET_INIT_BUILTINS ia64_init_builtins
295 #undef TARGET_EXPAND_BUILTIN
296 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
298 #undef TARGET_ASM_BYTE_OP
299 #define TARGET_ASM_BYTE_OP "\tdata1\t"
300 #undef TARGET_ASM_ALIGNED_HI_OP
301 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
302 #undef TARGET_ASM_ALIGNED_SI_OP
303 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
304 #undef TARGET_ASM_ALIGNED_DI_OP
305 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
306 #undef TARGET_ASM_UNALIGNED_HI_OP
307 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
308 #undef TARGET_ASM_UNALIGNED_SI_OP
309 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
310 #undef TARGET_ASM_UNALIGNED_DI_OP
311 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
312 #undef TARGET_ASM_INTEGER
313 #define TARGET_ASM_INTEGER ia64_assemble_integer
315 #undef TARGET_ASM_FUNCTION_PROLOGUE
316 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
317 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
318 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
319 #undef TARGET_ASM_FUNCTION_EPILOGUE
320 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
322 #undef TARGET_IN_SMALL_DATA_P
323 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
325 #undef TARGET_SCHED_ADJUST_COST
326 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
327 #undef TARGET_SCHED_ISSUE_RATE
328 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
329 #undef TARGET_SCHED_VARIABLE_ISSUE
330 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
331 #undef TARGET_SCHED_INIT
332 #define TARGET_SCHED_INIT ia64_sched_init
333 #undef TARGET_SCHED_FINISH
334 #define TARGET_SCHED_FINISH ia64_sched_finish
335 #undef TARGET_SCHED_REORDER
336 #define TARGET_SCHED_REORDER ia64_sched_reorder
337 #undef TARGET_SCHED_REORDER2
338 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
340 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
341 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
343 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
344 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hook_int_void_1
346 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
347 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
349 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
350 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
351 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
352 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
354 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
355 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
356 ia64_first_cycle_multipass_dfa_lookahead_guard
358 #undef TARGET_SCHED_DFA_NEW_CYCLE
359 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
361 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
362 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
364 #undef TARGET_ASM_OUTPUT_MI_THUNK
365 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
366 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
367 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
369 #undef TARGET_ASM_FILE_START
370 #define TARGET_ASM_FILE_START ia64_file_start
372 #undef TARGET_RTX_COSTS
373 #define TARGET_RTX_COSTS ia64_rtx_costs
374 #undef TARGET_ADDRESS_COST
375 #define TARGET_ADDRESS_COST hook_int_rtx_0
377 #undef TARGET_MACHINE_DEPENDENT_REORG
378 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
380 #undef TARGET_ENCODE_SECTION_INFO
381 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
383 /* ??? ABI doesn't allow us to define this. */
385 #undef TARGET_PROMOTE_FUNCTION_ARGS
386 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
389 /* ??? ABI doesn't allow us to define this. */
391 #undef TARGET_PROMOTE_FUNCTION_RETURN
392 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
395 /* ??? Investigate. */
397 #undef TARGET_PROMOTE_PROTOTYPES
398 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
401 #undef TARGET_STRUCT_VALUE_RTX
402 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
403 #undef TARGET_RETURN_IN_MEMORY
404 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
406 #undef TARGET_SETUP_INCOMING_VARARGS
407 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
408 #undef TARGET_STRICT_ARGUMENT_NAMING
409 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
411 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
412 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
414 struct gcc_target targetm = TARGET_INITIALIZER;
416 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
419 call_operand (rtx op, enum machine_mode mode)
421 if (mode != GET_MODE (op) && mode != VOIDmode)
424 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
425 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
428 /* Return 1 if OP refers to a symbol in the sdata section. */
431 sdata_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
433 switch (GET_CODE (op))
436 if (GET_CODE (XEXP (op, 0)) != PLUS
437 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
439 op = XEXP (XEXP (op, 0), 0);
443 if (CONSTANT_POOL_ADDRESS_P (op))
444 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
446 return SYMBOL_REF_LOCAL_P (op) && SYMBOL_REF_SMALL_P (op);
456 small_addr_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
458 return SYMBOL_REF_SMALL_ADDR_P (op);
461 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
464 got_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
466 switch (GET_CODE (op))
470 if (GET_CODE (op) != PLUS)
472 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
475 if (GET_CODE (op) != CONST_INT)
480 /* Ok if we're not using GOT entries at all. */
481 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
484 /* "Ok" while emitting rtl, since otherwise we won't be provided
485 with the entire offset during emission, which makes it very
486 hard to split the offset into high and low parts. */
487 if (rtx_equal_function_value_matters)
490 /* Force the low 14 bits of the constant to zero so that we do not
491 use up so many GOT entries. */
492 return (INTVAL (op) & 0x3fff) == 0;
495 if (SYMBOL_REF_SMALL_ADDR_P (op))
506 /* Return 1 if OP refers to a symbol. */
509 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
511 switch (GET_CODE (op))
524 /* Return tls_model if OP refers to a TLS symbol. */
527 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
529 if (GET_CODE (op) != SYMBOL_REF)
531 return SYMBOL_REF_TLS_MODEL (op);
535 /* Return 1 if OP refers to a function. */
538 function_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
540 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (op))
546 /* Return 1 if OP is setjmp or a similar function. */
548 /* ??? This is an unsatisfying solution. Should rethink. */
551 setjmp_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
556 if (GET_CODE (op) != SYMBOL_REF)
561 /* The following code is borrowed from special_function_p in calls.c. */
563 /* Disregard prefix _, __ or __x. */
566 if (name[1] == '_' && name[2] == 'x')
568 else if (name[1] == '_')
578 && (! strcmp (name, "setjmp")
579 || ! strcmp (name, "setjmp_syscall")))
581 && ! strcmp (name, "sigsetjmp"))
583 && ! strcmp (name, "savectx")));
585 else if ((name[0] == 'q' && name[1] == 's'
586 && ! strcmp (name, "qsetjmp"))
587 || (name[0] == 'v' && name[1] == 'f'
588 && ! strcmp (name, "vfork")))
594 /* Return 1 if OP is a general operand, excluding tls symbolic operands. */
597 move_operand (rtx op, enum machine_mode mode)
599 return general_operand (op, mode) && !tls_symbolic_operand (op, mode);
602 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
605 gr_register_operand (rtx op, enum machine_mode mode)
607 if (! register_operand (op, mode))
609 if (GET_CODE (op) == SUBREG)
610 op = SUBREG_REG (op);
611 if (GET_CODE (op) == REG)
613 unsigned int regno = REGNO (op);
614 if (regno < FIRST_PSEUDO_REGISTER)
615 return GENERAL_REGNO_P (regno);
620 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
623 fr_register_operand (rtx op, enum machine_mode mode)
625 if (! register_operand (op, mode))
627 if (GET_CODE (op) == SUBREG)
628 op = SUBREG_REG (op);
629 if (GET_CODE (op) == REG)
631 unsigned int regno = REGNO (op);
632 if (regno < FIRST_PSEUDO_REGISTER)
633 return FR_REGNO_P (regno);
638 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
641 grfr_register_operand (rtx op, enum machine_mode mode)
643 if (! register_operand (op, mode))
645 if (GET_CODE (op) == SUBREG)
646 op = SUBREG_REG (op);
647 if (GET_CODE (op) == REG)
649 unsigned int regno = REGNO (op);
650 if (regno < FIRST_PSEUDO_REGISTER)
651 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
656 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
659 gr_nonimmediate_operand (rtx op, enum machine_mode mode)
661 if (! nonimmediate_operand (op, mode))
663 if (GET_CODE (op) == SUBREG)
664 op = SUBREG_REG (op);
665 if (GET_CODE (op) == REG)
667 unsigned int regno = REGNO (op);
668 if (regno < FIRST_PSEUDO_REGISTER)
669 return GENERAL_REGNO_P (regno);
674 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
677 fr_nonimmediate_operand (rtx op, enum machine_mode mode)
679 if (! nonimmediate_operand (op, mode))
681 if (GET_CODE (op) == SUBREG)
682 op = SUBREG_REG (op);
683 if (GET_CODE (op) == REG)
685 unsigned int regno = REGNO (op);
686 if (regno < FIRST_PSEUDO_REGISTER)
687 return FR_REGNO_P (regno);
692 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
695 grfr_nonimmediate_operand (rtx op, enum machine_mode mode)
697 if (! nonimmediate_operand (op, mode))
699 if (GET_CODE (op) == SUBREG)
700 op = SUBREG_REG (op);
701 if (GET_CODE (op) == REG)
703 unsigned int regno = REGNO (op);
704 if (regno < FIRST_PSEUDO_REGISTER)
705 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
710 /* Return 1 if OP is a GR register operand, or zero. */
713 gr_reg_or_0_operand (rtx op, enum machine_mode mode)
715 return (op == const0_rtx || gr_register_operand (op, mode));
718 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
721 gr_reg_or_5bit_operand (rtx op, enum machine_mode mode)
723 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
724 || gr_register_operand (op, mode));
727 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
730 gr_reg_or_6bit_operand (rtx op, enum machine_mode mode)
732 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
733 || gr_register_operand (op, mode));
736 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
739 gr_reg_or_8bit_operand (rtx op, enum machine_mode mode)
741 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
742 || gr_register_operand (op, mode));
745 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
748 grfr_reg_or_8bit_operand (rtx op, enum machine_mode mode)
750 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
751 || grfr_register_operand (op, mode));
754 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
758 gr_reg_or_8bit_adjusted_operand (rtx op, enum machine_mode mode)
760 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
761 || gr_register_operand (op, mode));
764 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
765 immediate and an 8 bit adjusted immediate operand. This is necessary
766 because when we emit a compare, we don't know what the condition will be,
767 so we need the union of the immediates accepted by GT and LT. */
770 gr_reg_or_8bit_and_adjusted_operand (rtx op, enum machine_mode mode)
772 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
773 && CONST_OK_FOR_L (INTVAL (op)))
774 || gr_register_operand (op, mode));
777 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
780 gr_reg_or_14bit_operand (rtx op, enum machine_mode mode)
782 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
783 || gr_register_operand (op, mode));
786 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
789 gr_reg_or_22bit_operand (rtx op, enum machine_mode mode)
791 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
792 || gr_register_operand (op, mode));
795 /* Return 1 if OP is a 6 bit immediate operand. */
798 shift_count_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
800 return (GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)));
803 /* Return 1 if OP is a 5 bit immediate operand. */
806 shift_32bit_count_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
808 return (GET_CODE (op) == CONST_INT
809 && (INTVAL (op) >= 0 && INTVAL (op) < 32));
812 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
815 shladd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
817 return (GET_CODE (op) == CONST_INT
818 && (INTVAL (op) == 2 || INTVAL (op) == 4
819 || INTVAL (op) == 8 || INTVAL (op) == 16));
822 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
825 fetchadd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
827 return (GET_CODE (op) == CONST_INT
828 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
829 INTVAL (op) == -4 || INTVAL (op) == -1 ||
830 INTVAL (op) == 1 || INTVAL (op) == 4 ||
831 INTVAL (op) == 8 || INTVAL (op) == 16));
834 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
837 fr_reg_or_fp01_operand (rtx op, enum machine_mode mode)
839 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
840 || fr_register_operand (op, mode));
843 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
844 POST_MODIFY with a REG as displacement. */
847 destination_operand (rtx op, enum machine_mode mode)
849 if (! nonimmediate_operand (op, mode))
851 if (GET_CODE (op) == MEM
852 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
853 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
858 /* Like memory_operand, but don't allow post-increments. */
861 not_postinc_memory_operand (rtx op, enum machine_mode mode)
863 return (memory_operand (op, mode)
864 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != RTX_AUTOINC);
867 /* Return 1 if this is a comparison operator, which accepts a normal 8-bit
868 signed immediate operand. */
871 normal_comparison_operator (register rtx op, enum machine_mode mode)
873 enum rtx_code code = GET_CODE (op);
874 return ((mode == VOIDmode || GET_MODE (op) == mode)
875 && (code == EQ || code == NE
876 || code == GT || code == LE || code == GTU || code == LEU));
879 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
880 signed immediate operand. */
883 adjusted_comparison_operator (register rtx op, enum machine_mode mode)
885 enum rtx_code code = GET_CODE (op);
886 return ((mode == VOIDmode || GET_MODE (op) == mode)
887 && (code == LT || code == GE || code == LTU || code == GEU));
890 /* Return 1 if this is a signed inequality operator. */
893 signed_inequality_operator (register rtx op, enum machine_mode mode)
895 enum rtx_code code = GET_CODE (op);
896 return ((mode == VOIDmode || GET_MODE (op) == mode)
897 && (code == GE || code == GT
898 || code == LE || code == LT));
901 /* Return 1 if this operator is valid for predication. */
904 predicate_operator (register rtx op, enum machine_mode mode)
906 enum rtx_code code = GET_CODE (op);
907 return ((GET_MODE (op) == mode || mode == VOIDmode)
908 && (code == EQ || code == NE));
911 /* Return 1 if this operator can be used in a conditional operation. */
914 condop_operator (register rtx op, enum machine_mode mode)
916 enum rtx_code code = GET_CODE (op);
917 return ((GET_MODE (op) == mode || mode == VOIDmode)
918 && (code == PLUS || code == MINUS || code == AND
919 || code == IOR || code == XOR));
922 /* Return 1 if this is the ar.lc register. */
925 ar_lc_reg_operand (register rtx op, enum machine_mode mode)
927 return (GET_MODE (op) == DImode
928 && (mode == DImode || mode == VOIDmode)
929 && GET_CODE (op) == REG
930 && REGNO (op) == AR_LC_REGNUM);
933 /* Return 1 if this is the ar.ccv register. */
936 ar_ccv_reg_operand (register rtx op, enum machine_mode mode)
938 return ((GET_MODE (op) == mode || mode == VOIDmode)
939 && GET_CODE (op) == REG
940 && REGNO (op) == AR_CCV_REGNUM);
943 /* Return 1 if this is the ar.pfs register. */
946 ar_pfs_reg_operand (register rtx op, enum machine_mode mode)
948 return ((GET_MODE (op) == mode || mode == VOIDmode)
949 && GET_CODE (op) == REG
950 && REGNO (op) == AR_PFS_REGNUM);
953 /* Like general_operand, but don't allow (mem (addressof)). */
956 general_xfmode_operand (rtx op, enum machine_mode mode)
958 if (! general_operand (op, mode))
966 destination_xfmode_operand (rtx op, enum machine_mode mode)
968 if (! destination_operand (op, mode))
976 xfreg_or_fp01_operand (rtx op, enum machine_mode mode)
978 if (GET_CODE (op) == SUBREG)
980 return fr_reg_or_fp01_operand (op, mode);
983 /* Return 1 if OP is valid as a base register in a reg + offset address. */
986 basereg_operand (rtx op, enum machine_mode mode)
988 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
989 checks from pa.c basereg_operand as well? Seems to be OK without them
992 return (register_operand (op, mode) &&
993 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
998 ADDR_AREA_NORMAL, /* normal address area */
999 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
1003 static GTY(()) tree small_ident1;
1004 static GTY(()) tree small_ident2;
1009 if (small_ident1 == 0)
1011 small_ident1 = get_identifier ("small");
1012 small_ident2 = get_identifier ("__small__");
1016 /* Retrieve the address area that has been chosen for the given decl. */
1018 static ia64_addr_area
1019 ia64_get_addr_area (tree decl)
1023 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
1029 id = TREE_VALUE (TREE_VALUE (model_attr));
1030 if (id == small_ident1 || id == small_ident2)
1031 return ADDR_AREA_SMALL;
1033 return ADDR_AREA_NORMAL;
1037 ia64_handle_model_attribute (tree *node, tree name, tree args, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1039 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
1040 ia64_addr_area area;
1041 tree arg, decl = *node;
1044 arg = TREE_VALUE (args);
1045 if (arg == small_ident1 || arg == small_ident2)
1047 addr_area = ADDR_AREA_SMALL;
1051 warning ("invalid argument of `%s' attribute",
1052 IDENTIFIER_POINTER (name));
1053 *no_add_attrs = true;
1056 switch (TREE_CODE (decl))
1059 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
1061 && !TREE_STATIC (decl))
1063 error ("%Jan address area attribute cannot be specified for "
1064 "local variables", decl, decl);
1065 *no_add_attrs = true;
1067 area = ia64_get_addr_area (decl);
1068 if (area != ADDR_AREA_NORMAL && addr_area != area)
1070 error ("%Jaddress area of '%s' conflicts with previous "
1071 "declaration", decl, decl);
1072 *no_add_attrs = true;
1077 error ("%Jaddress area attribute cannot be specified for functions",
1079 *no_add_attrs = true;
1083 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1084 *no_add_attrs = true;
1092 ia64_encode_addr_area (tree decl, rtx symbol)
1096 flags = SYMBOL_REF_FLAGS (symbol);
1097 switch (ia64_get_addr_area (decl))
1099 case ADDR_AREA_NORMAL: break;
1100 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
1103 SYMBOL_REF_FLAGS (symbol) = flags;
1107 ia64_encode_section_info (tree decl, rtx rtl, int first)
1109 default_encode_section_info (decl, rtl, first);
1111 /* Careful not to prod global register variables. */
1112 if (TREE_CODE (decl) == VAR_DECL
1113 && GET_CODE (DECL_RTL (decl)) == MEM
1114 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
1115 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
1116 ia64_encode_addr_area (decl, XEXP (rtl, 0));
1119 /* Return 1 if the operands of a move are ok. */
1122 ia64_move_ok (rtx dst, rtx src)
1124 /* If we're under init_recog_no_volatile, we'll not be able to use
1125 memory_operand. So check the code directly and don't worry about
1126 the validity of the underlying address, which should have been
1127 checked elsewhere anyway. */
1128 if (GET_CODE (dst) != MEM)
1130 if (GET_CODE (src) == MEM)
1132 if (register_operand (src, VOIDmode))
1135 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
1136 if (INTEGRAL_MODE_P (GET_MODE (dst)))
1137 return src == const0_rtx;
1139 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1143 addp4_optimize_ok (rtx op1, rtx op2)
1145 return (basereg_operand (op1, GET_MODE(op1)) !=
1146 basereg_operand (op2, GET_MODE(op2)));
1149 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
1150 Return the length of the field, or <= 0 on failure. */
1153 ia64_depz_field_mask (rtx rop, rtx rshift)
1155 unsigned HOST_WIDE_INT op = INTVAL (rop);
1156 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1158 /* Get rid of the zero bits we're shifting in. */
1161 /* We must now have a solid block of 1's at bit 0. */
1162 return exact_log2 (op + 1);
1165 /* Expand a symbolic constant load. */
1168 ia64_expand_load_address (rtx dest, rtx src)
1170 if (tls_symbolic_operand (src, VOIDmode))
1172 if (GET_CODE (dest) != REG)
1175 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1176 having to pointer-extend the value afterward. Other forms of address
1177 computation below are also more natural to compute as 64-bit quantities.
1178 If we've been given an SImode destination register, change it. */
1179 if (GET_MODE (dest) != Pmode)
1180 dest = gen_rtx_REG (Pmode, REGNO (dest));
1182 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (src))
1184 emit_insn (gen_rtx_SET (VOIDmode, dest, src));
1187 else if (TARGET_AUTO_PIC)
1189 emit_insn (gen_load_gprel64 (dest, src));
1192 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1194 emit_insn (gen_load_fptr (dest, src));
1197 else if (sdata_symbolic_operand (src, VOIDmode))
1199 emit_insn (gen_load_gprel (dest, src));
1203 if (GET_CODE (src) == CONST
1204 && GET_CODE (XEXP (src, 0)) == PLUS
1205 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1206 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1208 rtx sym = XEXP (XEXP (src, 0), 0);
1209 HOST_WIDE_INT ofs, hi, lo;
1211 /* Split the offset into a sign extended 14-bit low part
1212 and a complementary high part. */
1213 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1214 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1217 ia64_expand_load_address (dest, plus_constant (sym, hi));
1218 emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
1224 tmp = gen_rtx_HIGH (Pmode, src);
1225 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1226 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1228 tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
1229 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1233 static GTY(()) rtx gen_tls_tga;
1235 gen_tls_get_addr (void)
1238 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1242 static GTY(()) rtx thread_pointer_rtx;
1244 gen_thread_pointer (void)
1246 if (!thread_pointer_rtx)
1248 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1249 RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1251 return thread_pointer_rtx;
1255 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1)
1257 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1262 case TLS_MODEL_GLOBAL_DYNAMIC:
1265 tga_op1 = gen_reg_rtx (Pmode);
1266 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1267 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1268 RTX_UNCHANGING_P (tga_op1) = 1;
1270 tga_op2 = gen_reg_rtx (Pmode);
1271 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1272 tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1273 RTX_UNCHANGING_P (tga_op2) = 1;
1275 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1276 LCT_CONST, Pmode, 2, tga_op1,
1277 Pmode, tga_op2, Pmode);
1279 insns = get_insns ();
1282 if (GET_MODE (op0) != Pmode)
1284 emit_libcall_block (insns, op0, tga_ret, op1);
1287 case TLS_MODEL_LOCAL_DYNAMIC:
1288 /* ??? This isn't the completely proper way to do local-dynamic
1289 If the call to __tls_get_addr is used only by a single symbol,
1290 then we should (somehow) move the dtprel to the second arg
1291 to avoid the extra add. */
1294 tga_op1 = gen_reg_rtx (Pmode);
1295 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1296 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1297 RTX_UNCHANGING_P (tga_op1) = 1;
1299 tga_op2 = const0_rtx;
1301 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1302 LCT_CONST, Pmode, 2, tga_op1,
1303 Pmode, tga_op2, Pmode);
1305 insns = get_insns ();
1308 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1310 tmp = gen_reg_rtx (Pmode);
1311 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1313 if (!register_operand (op0, Pmode))
1314 op0 = gen_reg_rtx (Pmode);
1317 emit_insn (gen_load_dtprel (op0, op1));
1318 emit_insn (gen_adddi3 (op0, tmp, op0));
1321 emit_insn (gen_add_dtprel (op0, tmp, op1));
1324 case TLS_MODEL_INITIAL_EXEC:
1325 tmp = gen_reg_rtx (Pmode);
1326 emit_insn (gen_load_ltoff_tprel (tmp, op1));
1327 tmp = gen_rtx_MEM (Pmode, tmp);
1328 RTX_UNCHANGING_P (tmp) = 1;
1329 tmp = force_reg (Pmode, tmp);
1331 if (!register_operand (op0, Pmode))
1332 op0 = gen_reg_rtx (Pmode);
1333 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1336 case TLS_MODEL_LOCAL_EXEC:
1337 if (!register_operand (op0, Pmode))
1338 op0 = gen_reg_rtx (Pmode);
1341 emit_insn (gen_load_tprel (op0, op1));
1342 emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
1345 emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
1352 if (orig_op0 == op0)
1354 if (GET_MODE (orig_op0) == Pmode)
1356 return gen_lowpart (GET_MODE (orig_op0), op0);
1360 ia64_expand_move (rtx op0, rtx op1)
1362 enum machine_mode mode = GET_MODE (op0);
1364 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1365 op1 = force_reg (mode, op1);
1367 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1369 enum tls_model tls_kind;
1370 if ((tls_kind = tls_symbolic_operand (op1, VOIDmode)))
1371 return ia64_expand_tls_address (tls_kind, op0, op1);
1373 if (!TARGET_NO_PIC && reload_completed)
1375 ia64_expand_load_address (op0, op1);
1383 /* Split a move from OP1 to OP0 conditional on COND. */
1386 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1388 rtx insn, first = get_last_insn ();
1390 emit_move_insn (op0, op1);
1392 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1394 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1398 /* Split a post-reload TImode or TFmode reference into two DImode
1399 components. This is made extra difficult by the fact that we do
1400 not get any scratch registers to work with, because reload cannot
1401 be prevented from giving us a scratch that overlaps the register
1402 pair involved. So instead, when addressing memory, we tweak the
1403 pointer register up and back down with POST_INCs. Or up and not
1404 back down when we can get away with it.
1406 REVERSED is true when the loads must be done in reversed order
1407 (high word first) for correctness. DEAD is true when the pointer
1408 dies with the second insn we generate and therefore the second
1409 address must not carry a postmodify.
1411 May return an insn which is to be emitted after the moves. */
1414 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1418 switch (GET_CODE (in))
1421 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1422 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1427 /* Cannot occur reversed. */
1428 if (reversed) abort ();
1430 if (GET_MODE (in) != TFmode)
1431 split_double (in, &out[0], &out[1]);
1433 /* split_double does not understand how to split a TFmode
1434 quantity into a pair of DImode constants. */
1437 unsigned HOST_WIDE_INT p[2];
1438 long l[4]; /* TFmode is 128 bits */
1440 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1441 real_to_target (l, &r, TFmode);
1443 if (FLOAT_WORDS_BIG_ENDIAN)
1445 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1446 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1450 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1451 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1453 out[0] = GEN_INT (p[0]);
1454 out[1] = GEN_INT (p[1]);
1460 rtx base = XEXP (in, 0);
1463 switch (GET_CODE (base))
1468 out[0] = adjust_automodify_address
1469 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1470 out[1] = adjust_automodify_address
1471 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1475 /* Reversal requires a pre-increment, which can only
1476 be done as a separate insn. */
1477 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1478 out[0] = adjust_automodify_address
1479 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1480 out[1] = adjust_address (in, DImode, 0);
1485 if (reversed || dead) abort ();
1486 /* Just do the increment in two steps. */
1487 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1488 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1492 if (reversed || dead) abort ();
1493 /* Add 8, subtract 24. */
1494 base = XEXP (base, 0);
1495 out[0] = adjust_automodify_address
1496 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1497 out[1] = adjust_automodify_address
1499 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1504 if (reversed || dead) abort ();
1505 /* Extract and adjust the modification. This case is
1506 trickier than the others, because we might have an
1507 index register, or we might have a combined offset that
1508 doesn't fit a signed 9-bit displacement field. We can
1509 assume the incoming expression is already legitimate. */
1510 offset = XEXP (base, 1);
1511 base = XEXP (base, 0);
1513 out[0] = adjust_automodify_address
1514 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1516 if (GET_CODE (XEXP (offset, 1)) == REG)
1518 /* Can't adjust the postmodify to match. Emit the
1519 original, then a separate addition insn. */
1520 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1521 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1523 else if (GET_CODE (XEXP (offset, 1)) != CONST_INT)
1525 else if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1527 /* Again the postmodify cannot be made to match, but
1528 in this case it's more efficient to get rid of the
1529 postmodify entirely and fix up with an add insn. */
1530 out[1] = adjust_automodify_address (in, DImode, base, 8);
1531 fixup = gen_adddi3 (base, base,
1532 GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1536 /* Combined offset still fits in the displacement field.
1537 (We cannot overflow it at the high end.) */
1538 out[1] = adjust_automodify_address
1540 gen_rtx_POST_MODIFY (Pmode, base,
1541 gen_rtx_PLUS (Pmode, base,
1542 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1560 /* Split a TImode or TFmode move instruction after reload.
1561 This is used by *movtf_internal and *movti_internal. */
1563 ia64_split_tmode_move (rtx operands[])
1565 rtx in[2], out[2], insn;
1568 bool reversed = false;
1570 /* It is possible for reload to decide to overwrite a pointer with
1571 the value it points to. In that case we have to do the loads in
1572 the appropriate order so that the pointer is not destroyed too
1573 early. Also we must not generate a postmodify for that second
1574 load, or rws_access_regno will abort. */
1575 if (GET_CODE (operands[1]) == MEM
1576 && reg_overlap_mentioned_p (operands[0], operands[1]))
1578 rtx base = XEXP (operands[1], 0);
1579 while (GET_CODE (base) != REG)
1580 base = XEXP (base, 0);
1582 if (REGNO (base) == REGNO (operands[0]))
1586 /* Another reason to do the moves in reversed order is if the first
1587 element of the target register pair is also the second element of
1588 the source register pair. */
1589 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1590 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1593 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1594 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1596 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1597 if (GET_CODE (EXP) == MEM \
1598 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1599 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1600 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1601 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1602 XEXP (XEXP (EXP, 0), 0), \
1605 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1606 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1607 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1609 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1610 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1611 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1614 emit_insn (fixup[0]);
1616 emit_insn (fixup[1]);
1618 #undef MAYBE_ADD_REG_INC_NOTE
1621 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1622 through memory plus an extra GR scratch register. Except that you can
1623 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1624 SECONDARY_RELOAD_CLASS, but not both.
1626 We got into problems in the first place by allowing a construct like
1627 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1628 This solution attempts to prevent this situation from occurring. When
1629 we see something like the above, we spill the inner register to memory. */
1632 spill_xfmode_operand (rtx in, int force)
1634 if (GET_CODE (in) == SUBREG
1635 && GET_MODE (SUBREG_REG (in)) == TImode
1636 && GET_CODE (SUBREG_REG (in)) == REG)
1638 rtx memt = assign_stack_temp (TImode, 16, 0);
1639 emit_move_insn (memt, SUBREG_REG (in));
1640 return adjust_address (memt, XFmode, 0);
1642 else if (force && GET_CODE (in) == REG)
1644 rtx memx = assign_stack_temp (XFmode, 16, 0);
1645 emit_move_insn (memx, in);
1652 /* Emit comparison instruction if necessary, returning the expression
1653 that holds the compare result in the proper mode. */
1655 static GTY(()) rtx cmptf_libfunc;
1658 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1660 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1663 /* If we have a BImode input, then we already have a compare result, and
1664 do not need to emit another comparison. */
1665 if (GET_MODE (op0) == BImode)
1667 if ((code == NE || code == EQ) && op1 == const0_rtx)
1672 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1673 magic number as its third argument, that indicates what to do.
1674 The return value is an integer to be compared against zero. */
1675 else if (GET_MODE (op0) == TFmode)
1678 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1684 enum rtx_code ncode;
1686 if (!cmptf_libfunc || GET_MODE (op1) != TFmode)
1690 /* 1 = equal, 0 = not equal. Equality operators do
1691 not raise FP_INVALID when given an SNaN operand. */
1692 case EQ: magic = QCMP_EQ; ncode = NE; break;
1693 case NE: magic = QCMP_EQ; ncode = EQ; break;
1694 /* isunordered() from C99. */
1695 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1696 /* Relational operators raise FP_INVALID when given
1698 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1699 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1700 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1701 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1702 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1703 Expanders for buneq etc. weuld have to be added to ia64.md
1704 for this to be useful. */
1710 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1711 op0, TFmode, op1, TFmode,
1712 GEN_INT (magic), DImode);
1713 cmp = gen_reg_rtx (BImode);
1714 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1715 gen_rtx_fmt_ee (ncode, BImode,
1718 insns = get_insns ();
1721 emit_libcall_block (insns, cmp, cmp,
1722 gen_rtx_fmt_ee (code, BImode, op0, op1));
1727 cmp = gen_reg_rtx (BImode);
1728 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1729 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1733 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1736 /* Emit the appropriate sequence for a call. */
1739 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1744 addr = XEXP (addr, 0);
1745 addr = convert_memory_address (DImode, addr);
1746 b0 = gen_rtx_REG (DImode, R_BR (0));
1748 /* ??? Should do this for functions known to bind local too. */
1749 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1752 insn = gen_sibcall_nogp (addr);
1754 insn = gen_call_nogp (addr, b0);
1756 insn = gen_call_value_nogp (retval, addr, b0);
1757 insn = emit_call_insn (insn);
1762 insn = gen_sibcall_gp (addr);
1764 insn = gen_call_gp (addr, b0);
1766 insn = gen_call_value_gp (retval, addr, b0);
1767 insn = emit_call_insn (insn);
1769 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1773 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1777 ia64_reload_gp (void)
1781 if (current_frame_info.reg_save_gp)
1782 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1785 HOST_WIDE_INT offset;
1787 offset = (current_frame_info.spill_cfa_off
1788 + current_frame_info.spill_size);
1789 if (frame_pointer_needed)
1791 tmp = hard_frame_pointer_rtx;
1796 tmp = stack_pointer_rtx;
1797 offset = current_frame_info.total_size - offset;
1800 if (CONST_OK_FOR_I (offset))
1801 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1802 tmp, GEN_INT (offset)));
1805 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1806 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1807 pic_offset_table_rtx, tmp));
1810 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1813 emit_move_insn (pic_offset_table_rtx, tmp);
1817 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1818 rtx scratch_b, int noreturn_p, int sibcall_p)
1821 bool is_desc = false;
1823 /* If we find we're calling through a register, then we're actually
1824 calling through a descriptor, so load up the values. */
1825 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1830 /* ??? We are currently constrained to *not* use peep2, because
1831 we can legitimately change the global lifetime of the GP
1832 (in the form of killing where previously live). This is
1833 because a call through a descriptor doesn't use the previous
1834 value of the GP, while a direct call does, and we do not
1835 commit to either form until the split here.
1837 That said, this means that we lack precise life info for
1838 whether ADDR is dead after this call. This is not terribly
1839 important, since we can fix things up essentially for free
1840 with the POST_DEC below, but it's nice to not use it when we
1841 can immediately tell it's not necessary. */
1842 addr_dead_p = ((noreturn_p || sibcall_p
1843 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1845 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1847 /* Load the code address into scratch_b. */
1848 tmp = gen_rtx_POST_INC (Pmode, addr);
1849 tmp = gen_rtx_MEM (Pmode, tmp);
1850 emit_move_insn (scratch_r, tmp);
1851 emit_move_insn (scratch_b, scratch_r);
1853 /* Load the GP address. If ADDR is not dead here, then we must
1854 revert the change made above via the POST_INCREMENT. */
1856 tmp = gen_rtx_POST_DEC (Pmode, addr);
1859 tmp = gen_rtx_MEM (Pmode, tmp);
1860 emit_move_insn (pic_offset_table_rtx, tmp);
1867 insn = gen_sibcall_nogp (addr);
1869 insn = gen_call_value_nogp (retval, addr, retaddr);
1871 insn = gen_call_nogp (addr, retaddr);
1872 emit_call_insn (insn);
1874 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1878 /* Begin the assembly file. */
1881 ia64_file_start (void)
1883 default_file_start ();
1884 emit_safe_across_calls ();
1888 emit_safe_across_calls (void)
1890 unsigned int rs, re;
1897 while (rs < 64 && call_used_regs[PR_REG (rs)])
1901 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1905 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1909 fputc (',', asm_out_file);
1911 fprintf (asm_out_file, "p%u", rs);
1913 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1917 fputc ('\n', asm_out_file);
1920 /* Helper function for ia64_compute_frame_size: find an appropriate general
1921 register to spill some special register to. SPECIAL_SPILL_MASK contains
1922 bits in GR0 to GR31 that have already been allocated by this routine.
1923 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1926 find_gr_spill (int try_locals)
1930 /* If this is a leaf function, first try an otherwise unused
1931 call-clobbered register. */
1932 if (current_function_is_leaf)
1934 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1935 if (! regs_ever_live[regno]
1936 && call_used_regs[regno]
1937 && ! fixed_regs[regno]
1938 && ! global_regs[regno]
1939 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1941 current_frame_info.gr_used_mask |= 1 << regno;
1948 regno = current_frame_info.n_local_regs;
1949 /* If there is a frame pointer, then we can't use loc79, because
1950 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1951 reg_name switching code in ia64_expand_prologue. */
1952 if (regno < (80 - frame_pointer_needed))
1954 current_frame_info.n_local_regs = regno + 1;
1955 return LOC_REG (0) + regno;
1959 /* Failed to find a general register to spill to. Must use stack. */
1963 /* In order to make for nice schedules, we try to allocate every temporary
1964 to a different register. We must of course stay away from call-saved,
1965 fixed, and global registers. We must also stay away from registers
1966 allocated in current_frame_info.gr_used_mask, since those include regs
1967 used all through the prologue.
1969 Any register allocated here must be used immediately. The idea is to
1970 aid scheduling, not to solve data flow problems. */
1972 static int last_scratch_gr_reg;
1975 next_scratch_gr_reg (void)
1979 for (i = 0; i < 32; ++i)
1981 regno = (last_scratch_gr_reg + i + 1) & 31;
1982 if (call_used_regs[regno]
1983 && ! fixed_regs[regno]
1984 && ! global_regs[regno]
1985 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1987 last_scratch_gr_reg = regno;
1992 /* There must be _something_ available. */
1996 /* Helper function for ia64_compute_frame_size, called through
1997 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2000 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2002 unsigned int regno = REGNO (reg);
2005 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2006 for (i = 0; i < n; ++i)
2007 current_frame_info.gr_used_mask |= 1 << (regno + i);
2011 /* Returns the number of bytes offset between the frame pointer and the stack
2012 pointer for the current function. SIZE is the number of bytes of space
2013 needed for local variables. */
2016 ia64_compute_frame_size (HOST_WIDE_INT size)
2018 HOST_WIDE_INT total_size;
2019 HOST_WIDE_INT spill_size = 0;
2020 HOST_WIDE_INT extra_spill_size = 0;
2021 HOST_WIDE_INT pretend_args_size;
2024 int spilled_gr_p = 0;
2025 int spilled_fr_p = 0;
2029 if (current_frame_info.initialized)
2032 memset (¤t_frame_info, 0, sizeof current_frame_info);
2033 CLEAR_HARD_REG_SET (mask);
2035 /* Don't allocate scratches to the return register. */
2036 diddle_return_value (mark_reg_gr_used_mask, NULL);
2038 /* Don't allocate scratches to the EH scratch registers. */
2039 if (cfun->machine->ia64_eh_epilogue_sp)
2040 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2041 if (cfun->machine->ia64_eh_epilogue_bsp)
2042 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2044 /* Find the size of the register stack frame. We have only 80 local
2045 registers, because we reserve 8 for the inputs and 8 for the
2048 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2049 since we'll be adjusting that down later. */
2050 regno = LOC_REG (78) + ! frame_pointer_needed;
2051 for (; regno >= LOC_REG (0); regno--)
2052 if (regs_ever_live[regno])
2054 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2056 /* For functions marked with the syscall_linkage attribute, we must mark
2057 all eight input registers as in use, so that locals aren't visible to
2060 if (cfun->machine->n_varargs > 0
2061 || lookup_attribute ("syscall_linkage",
2062 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2063 current_frame_info.n_input_regs = 8;
2066 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2067 if (regs_ever_live[regno])
2069 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2072 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2073 if (regs_ever_live[regno])
2075 i = regno - OUT_REG (0) + 1;
2077 /* When -p profiling, we need one output register for the mcount argument.
2078 Likewise for -a profiling for the bb_init_func argument. For -ax
2079 profiling, we need two output registers for the two bb_init_trace_func
2081 if (current_function_profile)
2083 current_frame_info.n_output_regs = i;
2085 /* ??? No rotating register support yet. */
2086 current_frame_info.n_rotate_regs = 0;
2088 /* Discover which registers need spilling, and how much room that
2089 will take. Begin with floating point and general registers,
2090 which will always wind up on the stack. */
2092 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2093 if (regs_ever_live[regno] && ! call_used_regs[regno])
2095 SET_HARD_REG_BIT (mask, regno);
2101 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2102 if (regs_ever_live[regno] && ! call_used_regs[regno])
2104 SET_HARD_REG_BIT (mask, regno);
2110 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2111 if (regs_ever_live[regno] && ! call_used_regs[regno])
2113 SET_HARD_REG_BIT (mask, regno);
2118 /* Now come all special registers that might get saved in other
2119 general registers. */
2121 if (frame_pointer_needed)
2123 current_frame_info.reg_fp = find_gr_spill (1);
2124 /* If we did not get a register, then we take LOC79. This is guaranteed
2125 to be free, even if regs_ever_live is already set, because this is
2126 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2127 as we don't count loc79 above. */
2128 if (current_frame_info.reg_fp == 0)
2130 current_frame_info.reg_fp = LOC_REG (79);
2131 current_frame_info.n_local_regs++;
2135 if (! current_function_is_leaf)
2137 /* Emit a save of BR0 if we call other functions. Do this even
2138 if this function doesn't return, as EH depends on this to be
2139 able to unwind the stack. */
2140 SET_HARD_REG_BIT (mask, BR_REG (0));
2142 current_frame_info.reg_save_b0 = find_gr_spill (1);
2143 if (current_frame_info.reg_save_b0 == 0)
2149 /* Similarly for ar.pfs. */
2150 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2151 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2152 if (current_frame_info.reg_save_ar_pfs == 0)
2154 extra_spill_size += 8;
2158 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2159 registers are clobbered, so we fall back to the stack. */
2160 current_frame_info.reg_save_gp
2161 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2162 if (current_frame_info.reg_save_gp == 0)
2164 SET_HARD_REG_BIT (mask, GR_REG (1));
2171 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2173 SET_HARD_REG_BIT (mask, BR_REG (0));
2178 if (regs_ever_live[AR_PFS_REGNUM])
2180 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2181 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2182 if (current_frame_info.reg_save_ar_pfs == 0)
2184 extra_spill_size += 8;
2190 /* Unwind descriptor hackery: things are most efficient if we allocate
2191 consecutive GR save registers for RP, PFS, FP in that order. However,
2192 it is absolutely critical that FP get the only hard register that's
2193 guaranteed to be free, so we allocated it first. If all three did
2194 happen to be allocated hard regs, and are consecutive, rearrange them
2195 into the preferred order now. */
2196 if (current_frame_info.reg_fp != 0
2197 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2198 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2200 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2201 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2202 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2205 /* See if we need to store the predicate register block. */
2206 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2207 if (regs_ever_live[regno] && ! call_used_regs[regno])
2209 if (regno <= PR_REG (63))
2211 SET_HARD_REG_BIT (mask, PR_REG (0));
2212 current_frame_info.reg_save_pr = find_gr_spill (1);
2213 if (current_frame_info.reg_save_pr == 0)
2215 extra_spill_size += 8;
2219 /* ??? Mark them all as used so that register renaming and such
2220 are free to use them. */
2221 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2222 regs_ever_live[regno] = 1;
2225 /* If we're forced to use st8.spill, we're forced to save and restore
2226 ar.unat as well. The check for existing liveness allows inline asm
2227 to touch ar.unat. */
2228 if (spilled_gr_p || cfun->machine->n_varargs
2229 || regs_ever_live[AR_UNAT_REGNUM])
2231 regs_ever_live[AR_UNAT_REGNUM] = 1;
2232 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2233 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2234 if (current_frame_info.reg_save_ar_unat == 0)
2236 extra_spill_size += 8;
2241 if (regs_ever_live[AR_LC_REGNUM])
2243 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2244 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2245 if (current_frame_info.reg_save_ar_lc == 0)
2247 extra_spill_size += 8;
2252 /* If we have an odd number of words of pretend arguments written to
2253 the stack, then the FR save area will be unaligned. We round the
2254 size of this area up to keep things 16 byte aligned. */
2256 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2258 pretend_args_size = current_function_pretend_args_size;
2260 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2261 + current_function_outgoing_args_size);
2262 total_size = IA64_STACK_ALIGN (total_size);
2264 /* We always use the 16-byte scratch area provided by the caller, but
2265 if we are a leaf function, there's no one to which we need to provide
2267 if (current_function_is_leaf)
2268 total_size = MAX (0, total_size - 16);
2270 current_frame_info.total_size = total_size;
2271 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2272 current_frame_info.spill_size = spill_size;
2273 current_frame_info.extra_spill_size = extra_spill_size;
2274 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2275 current_frame_info.n_spilled = n_spilled;
2276 current_frame_info.initialized = reload_completed;
2279 /* Compute the initial difference between the specified pair of registers. */
2282 ia64_initial_elimination_offset (int from, int to)
2284 HOST_WIDE_INT offset;
2286 ia64_compute_frame_size (get_frame_size ());
2289 case FRAME_POINTER_REGNUM:
2290 if (to == HARD_FRAME_POINTER_REGNUM)
2292 if (current_function_is_leaf)
2293 offset = -current_frame_info.total_size;
2295 offset = -(current_frame_info.total_size
2296 - current_function_outgoing_args_size - 16);
2298 else if (to == STACK_POINTER_REGNUM)
2300 if (current_function_is_leaf)
2303 offset = 16 + current_function_outgoing_args_size;
2309 case ARG_POINTER_REGNUM:
2310 /* Arguments start above the 16 byte save area, unless stdarg
2311 in which case we store through the 16 byte save area. */
2312 if (to == HARD_FRAME_POINTER_REGNUM)
2313 offset = 16 - current_function_pretend_args_size;
2314 else if (to == STACK_POINTER_REGNUM)
2315 offset = (current_frame_info.total_size
2316 + 16 - current_function_pretend_args_size);
2328 /* If there are more than a trivial number of register spills, we use
2329 two interleaved iterators so that we can get two memory references
2332 In order to simplify things in the prologue and epilogue expanders,
2333 we use helper functions to fix up the memory references after the
2334 fact with the appropriate offsets to a POST_MODIFY memory mode.
2335 The following data structure tracks the state of the two iterators
2336 while insns are being emitted. */
2338 struct spill_fill_data
2340 rtx init_after; /* point at which to emit initializations */
2341 rtx init_reg[2]; /* initial base register */
2342 rtx iter_reg[2]; /* the iterator registers */
2343 rtx *prev_addr[2]; /* address of last memory use */
2344 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2345 HOST_WIDE_INT prev_off[2]; /* last offset */
2346 int n_iter; /* number of iterators in use */
2347 int next_iter; /* next iterator to use */
2348 unsigned int save_gr_used_mask;
2351 static struct spill_fill_data spill_fill_data;
2354 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2358 spill_fill_data.init_after = get_last_insn ();
2359 spill_fill_data.init_reg[0] = init_reg;
2360 spill_fill_data.init_reg[1] = init_reg;
2361 spill_fill_data.prev_addr[0] = NULL;
2362 spill_fill_data.prev_addr[1] = NULL;
2363 spill_fill_data.prev_insn[0] = NULL;
2364 spill_fill_data.prev_insn[1] = NULL;
2365 spill_fill_data.prev_off[0] = cfa_off;
2366 spill_fill_data.prev_off[1] = cfa_off;
2367 spill_fill_data.next_iter = 0;
2368 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2370 spill_fill_data.n_iter = 1 + (n_spills > 2);
2371 for (i = 0; i < spill_fill_data.n_iter; ++i)
2373 int regno = next_scratch_gr_reg ();
2374 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2375 current_frame_info.gr_used_mask |= 1 << regno;
2380 finish_spill_pointers (void)
2382 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2386 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2388 int iter = spill_fill_data.next_iter;
2389 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2390 rtx disp_rtx = GEN_INT (disp);
2393 if (spill_fill_data.prev_addr[iter])
2395 if (CONST_OK_FOR_N (disp))
2397 *spill_fill_data.prev_addr[iter]
2398 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2399 gen_rtx_PLUS (DImode,
2400 spill_fill_data.iter_reg[iter],
2402 REG_NOTES (spill_fill_data.prev_insn[iter])
2403 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2404 REG_NOTES (spill_fill_data.prev_insn[iter]));
2408 /* ??? Could use register post_modify for loads. */
2409 if (! CONST_OK_FOR_I (disp))
2411 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2412 emit_move_insn (tmp, disp_rtx);
2415 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2416 spill_fill_data.iter_reg[iter], disp_rtx));
2419 /* Micro-optimization: if we've created a frame pointer, it's at
2420 CFA 0, which may allow the real iterator to be initialized lower,
2421 slightly increasing parallelism. Also, if there are few saves
2422 it may eliminate the iterator entirely. */
2424 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2425 && frame_pointer_needed)
2427 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2428 set_mem_alias_set (mem, get_varargs_alias_set ());
2436 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2437 spill_fill_data.init_reg[iter]);
2442 if (! CONST_OK_FOR_I (disp))
2444 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2445 emit_move_insn (tmp, disp_rtx);
2449 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2450 spill_fill_data.init_reg[iter],
2457 /* Careful for being the first insn in a sequence. */
2458 if (spill_fill_data.init_after)
2459 insn = emit_insn_after (seq, spill_fill_data.init_after);
2462 rtx first = get_insns ();
2464 insn = emit_insn_before (seq, first);
2466 insn = emit_insn (seq);
2468 spill_fill_data.init_after = insn;
2470 /* If DISP is 0, we may or may not have a further adjustment
2471 afterward. If we do, then the load/store insn may be modified
2472 to be a post-modify. If we don't, then this copy may be
2473 eliminated by copyprop_hardreg_forward, which makes this
2474 insn garbage, which runs afoul of the sanity check in
2475 propagate_one_insn. So mark this insn as legal to delete. */
2477 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2481 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2483 /* ??? Not all of the spills are for varargs, but some of them are.
2484 The rest of the spills belong in an alias set of their own. But
2485 it doesn't actually hurt to include them here. */
2486 set_mem_alias_set (mem, get_varargs_alias_set ());
2488 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2489 spill_fill_data.prev_off[iter] = cfa_off;
2491 if (++iter >= spill_fill_data.n_iter)
2493 spill_fill_data.next_iter = iter;
2499 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2502 int iter = spill_fill_data.next_iter;
2505 mem = spill_restore_mem (reg, cfa_off);
2506 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2507 spill_fill_data.prev_insn[iter] = insn;
2514 RTX_FRAME_RELATED_P (insn) = 1;
2516 /* Don't even pretend that the unwind code can intuit its way
2517 through a pair of interleaved post_modify iterators. Just
2518 provide the correct answer. */
2520 if (frame_pointer_needed)
2522 base = hard_frame_pointer_rtx;
2527 base = stack_pointer_rtx;
2528 off = current_frame_info.total_size - cfa_off;
2532 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2533 gen_rtx_SET (VOIDmode,
2534 gen_rtx_MEM (GET_MODE (reg),
2535 plus_constant (base, off)),
2542 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2544 int iter = spill_fill_data.next_iter;
2547 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2548 GEN_INT (cfa_off)));
2549 spill_fill_data.prev_insn[iter] = insn;
2552 /* Wrapper functions that discards the CONST_INT spill offset. These
2553 exist so that we can give gr_spill/gr_fill the offset they need and
2554 use a consistent function interface. */
2557 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2559 return gen_movdi (dest, src);
2563 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2565 return gen_fr_spill (dest, src);
2569 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2571 return gen_fr_restore (dest, src);
2574 /* Called after register allocation to add any instructions needed for the
2575 prologue. Using a prologue insn is favored compared to putting all of the
2576 instructions in output_function_prologue(), since it allows the scheduler
2577 to intermix instructions with the saves of the caller saved registers. In
2578 some cases, it might be necessary to emit a barrier instruction as the last
2579 insn to prevent such scheduling.
2581 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2582 so that the debug info generation code can handle them properly.
2584 The register save area is layed out like so:
2586 [ varargs spill area ]
2587 [ fr register spill area ]
2588 [ br register spill area ]
2589 [ ar register spill area ]
2590 [ pr register spill area ]
2591 [ gr register spill area ] */
2593 /* ??? Get inefficient code when the frame size is larger than can fit in an
2594 adds instruction. */
2597 ia64_expand_prologue (void)
2599 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2600 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2603 ia64_compute_frame_size (get_frame_size ());
2604 last_scratch_gr_reg = 15;
2606 /* If there is no epilogue, then we don't need some prologue insns.
2607 We need to avoid emitting the dead prologue insns, because flow
2608 will complain about them. */
2613 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2614 if ((e->flags & EDGE_FAKE) == 0
2615 && (e->flags & EDGE_FALLTHRU) != 0)
2617 epilogue_p = (e != NULL);
2622 /* Set the local, input, and output register names. We need to do this
2623 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2624 half. If we use in/loc/out register names, then we get assembler errors
2625 in crtn.S because there is no alloc insn or regstk directive in there. */
2626 if (! TARGET_REG_NAMES)
2628 int inputs = current_frame_info.n_input_regs;
2629 int locals = current_frame_info.n_local_regs;
2630 int outputs = current_frame_info.n_output_regs;
2632 for (i = 0; i < inputs; i++)
2633 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2634 for (i = 0; i < locals; i++)
2635 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2636 for (i = 0; i < outputs; i++)
2637 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2640 /* Set the frame pointer register name. The regnum is logically loc79,
2641 but of course we'll not have allocated that many locals. Rather than
2642 worrying about renumbering the existing rtxs, we adjust the name. */
2643 /* ??? This code means that we can never use one local register when
2644 there is a frame pointer. loc79 gets wasted in this case, as it is
2645 renamed to a register that will never be used. See also the try_locals
2646 code in find_gr_spill. */
2647 if (current_frame_info.reg_fp)
2649 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2650 reg_names[HARD_FRAME_POINTER_REGNUM]
2651 = reg_names[current_frame_info.reg_fp];
2652 reg_names[current_frame_info.reg_fp] = tmp;
2655 /* We don't need an alloc instruction if we've used no outputs or locals. */
2656 if (current_frame_info.n_local_regs == 0
2657 && current_frame_info.n_output_regs == 0
2658 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2659 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2661 /* If there is no alloc, but there are input registers used, then we
2662 need a .regstk directive. */
2663 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2664 ar_pfs_save_reg = NULL_RTX;
2668 current_frame_info.need_regstk = 0;
2670 if (current_frame_info.reg_save_ar_pfs)
2671 regno = current_frame_info.reg_save_ar_pfs;
2673 regno = next_scratch_gr_reg ();
2674 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2676 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2677 GEN_INT (current_frame_info.n_input_regs),
2678 GEN_INT (current_frame_info.n_local_regs),
2679 GEN_INT (current_frame_info.n_output_regs),
2680 GEN_INT (current_frame_info.n_rotate_regs)));
2681 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2684 /* Set up frame pointer, stack pointer, and spill iterators. */
2686 n_varargs = cfun->machine->n_varargs;
2687 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2688 stack_pointer_rtx, 0);
2690 if (frame_pointer_needed)
2692 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2693 RTX_FRAME_RELATED_P (insn) = 1;
2696 if (current_frame_info.total_size != 0)
2698 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2701 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2702 offset = frame_size_rtx;
2705 regno = next_scratch_gr_reg ();
2706 offset = gen_rtx_REG (DImode, regno);
2707 emit_move_insn (offset, frame_size_rtx);
2710 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2711 stack_pointer_rtx, offset));
2713 if (! frame_pointer_needed)
2715 RTX_FRAME_RELATED_P (insn) = 1;
2716 if (GET_CODE (offset) != CONST_INT)
2719 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2720 gen_rtx_SET (VOIDmode,
2722 gen_rtx_PLUS (DImode,
2729 /* ??? At this point we must generate a magic insn that appears to
2730 modify the stack pointer, the frame pointer, and all spill
2731 iterators. This would allow the most scheduling freedom. For
2732 now, just hard stop. */
2733 emit_insn (gen_blockage ());
2736 /* Must copy out ar.unat before doing any integer spills. */
2737 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2739 if (current_frame_info.reg_save_ar_unat)
2741 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2744 alt_regno = next_scratch_gr_reg ();
2745 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2746 current_frame_info.gr_used_mask |= 1 << alt_regno;
2749 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2750 insn = emit_move_insn (ar_unat_save_reg, reg);
2751 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2753 /* Even if we're not going to generate an epilogue, we still
2754 need to save the register so that EH works. */
2755 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2756 emit_insn (gen_prologue_use (ar_unat_save_reg));
2759 ar_unat_save_reg = NULL_RTX;
2761 /* Spill all varargs registers. Do this before spilling any GR registers,
2762 since we want the UNAT bits for the GR registers to override the UNAT
2763 bits from varargs, which we don't care about. */
2766 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2768 reg = gen_rtx_REG (DImode, regno);
2769 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2772 /* Locate the bottom of the register save area. */
2773 cfa_off = (current_frame_info.spill_cfa_off
2774 + current_frame_info.spill_size
2775 + current_frame_info.extra_spill_size);
2777 /* Save the predicate register block either in a register or in memory. */
2778 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2780 reg = gen_rtx_REG (DImode, PR_REG (0));
2781 if (current_frame_info.reg_save_pr != 0)
2783 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2784 insn = emit_move_insn (alt_reg, reg);
2786 /* ??? Denote pr spill/fill by a DImode move that modifies all
2787 64 hard registers. */
2788 RTX_FRAME_RELATED_P (insn) = 1;
2790 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2791 gen_rtx_SET (VOIDmode, alt_reg, reg),
2794 /* Even if we're not going to generate an epilogue, we still
2795 need to save the register so that EH works. */
2797 emit_insn (gen_prologue_use (alt_reg));
2801 alt_regno = next_scratch_gr_reg ();
2802 alt_reg = gen_rtx_REG (DImode, alt_regno);
2803 insn = emit_move_insn (alt_reg, reg);
2804 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2809 /* Handle AR regs in numerical order. All of them get special handling. */
2810 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2811 && current_frame_info.reg_save_ar_unat == 0)
2813 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2814 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2818 /* The alloc insn already copied ar.pfs into a general register. The
2819 only thing we have to do now is copy that register to a stack slot
2820 if we'd not allocated a local register for the job. */
2821 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2822 && current_frame_info.reg_save_ar_pfs == 0)
2824 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2825 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2829 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2831 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2832 if (current_frame_info.reg_save_ar_lc != 0)
2834 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2835 insn = emit_move_insn (alt_reg, reg);
2836 RTX_FRAME_RELATED_P (insn) = 1;
2838 /* Even if we're not going to generate an epilogue, we still
2839 need to save the register so that EH works. */
2841 emit_insn (gen_prologue_use (alt_reg));
2845 alt_regno = next_scratch_gr_reg ();
2846 alt_reg = gen_rtx_REG (DImode, alt_regno);
2847 emit_move_insn (alt_reg, reg);
2848 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2853 if (current_frame_info.reg_save_gp)
2855 insn = emit_move_insn (gen_rtx_REG (DImode,
2856 current_frame_info.reg_save_gp),
2857 pic_offset_table_rtx);
2858 /* We don't know for sure yet if this is actually needed, since
2859 we've not split the PIC call patterns. If all of the calls
2860 are indirect, and not followed by any uses of the gp, then
2861 this save is dead. Allow it to go away. */
2863 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2866 /* We should now be at the base of the gr/br/fr spill area. */
2867 if (cfa_off != (current_frame_info.spill_cfa_off
2868 + current_frame_info.spill_size))
2871 /* Spill all general registers. */
2872 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2873 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2875 reg = gen_rtx_REG (DImode, regno);
2876 do_spill (gen_gr_spill, reg, cfa_off, reg);
2880 /* Handle BR0 specially -- it may be getting stored permanently in
2881 some GR register. */
2882 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2884 reg = gen_rtx_REG (DImode, BR_REG (0));
2885 if (current_frame_info.reg_save_b0 != 0)
2887 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2888 insn = emit_move_insn (alt_reg, reg);
2889 RTX_FRAME_RELATED_P (insn) = 1;
2891 /* Even if we're not going to generate an epilogue, we still
2892 need to save the register so that EH works. */
2894 emit_insn (gen_prologue_use (alt_reg));
2898 alt_regno = next_scratch_gr_reg ();
2899 alt_reg = gen_rtx_REG (DImode, alt_regno);
2900 emit_move_insn (alt_reg, reg);
2901 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2906 /* Spill the rest of the BR registers. */
2907 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2908 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2910 alt_regno = next_scratch_gr_reg ();
2911 alt_reg = gen_rtx_REG (DImode, alt_regno);
2912 reg = gen_rtx_REG (DImode, regno);
2913 emit_move_insn (alt_reg, reg);
2914 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2918 /* Align the frame and spill all FR registers. */
2919 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2920 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2924 reg = gen_rtx_REG (XFmode, regno);
2925 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2929 if (cfa_off != current_frame_info.spill_cfa_off)
2932 finish_spill_pointers ();
2935 /* Called after register allocation to add any instructions needed for the
2936 epilogue. Using an epilogue insn is favored compared to putting all of the
2937 instructions in output_function_prologue(), since it allows the scheduler
2938 to intermix instructions with the saves of the caller saved registers. In
2939 some cases, it might be necessary to emit a barrier instruction as the last
2940 insn to prevent such scheduling. */
2943 ia64_expand_epilogue (int sibcall_p)
2945 rtx insn, reg, alt_reg, ar_unat_save_reg;
2946 int regno, alt_regno, cfa_off;
2948 ia64_compute_frame_size (get_frame_size ());
2950 /* If there is a frame pointer, then we use it instead of the stack
2951 pointer, so that the stack pointer does not need to be valid when
2952 the epilogue starts. See EXIT_IGNORE_STACK. */
2953 if (frame_pointer_needed)
2954 setup_spill_pointers (current_frame_info.n_spilled,
2955 hard_frame_pointer_rtx, 0);
2957 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2958 current_frame_info.total_size);
2960 if (current_frame_info.total_size != 0)
2962 /* ??? At this point we must generate a magic insn that appears to
2963 modify the spill iterators and the frame pointer. This would
2964 allow the most scheduling freedom. For now, just hard stop. */
2965 emit_insn (gen_blockage ());
2968 /* Locate the bottom of the register save area. */
2969 cfa_off = (current_frame_info.spill_cfa_off
2970 + current_frame_info.spill_size
2971 + current_frame_info.extra_spill_size);
2973 /* Restore the predicate registers. */
2974 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2976 if (current_frame_info.reg_save_pr != 0)
2977 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2980 alt_regno = next_scratch_gr_reg ();
2981 alt_reg = gen_rtx_REG (DImode, alt_regno);
2982 do_restore (gen_movdi_x, alt_reg, cfa_off);
2985 reg = gen_rtx_REG (DImode, PR_REG (0));
2986 emit_move_insn (reg, alt_reg);
2989 /* Restore the application registers. */
2991 /* Load the saved unat from the stack, but do not restore it until
2992 after the GRs have been restored. */
2993 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2995 if (current_frame_info.reg_save_ar_unat != 0)
2997 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3000 alt_regno = next_scratch_gr_reg ();
3001 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3002 current_frame_info.gr_used_mask |= 1 << alt_regno;
3003 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3008 ar_unat_save_reg = NULL_RTX;
3010 if (current_frame_info.reg_save_ar_pfs != 0)
3012 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
3013 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3014 emit_move_insn (reg, alt_reg);
3016 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3018 alt_regno = next_scratch_gr_reg ();
3019 alt_reg = gen_rtx_REG (DImode, alt_regno);
3020 do_restore (gen_movdi_x, alt_reg, cfa_off);
3022 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3023 emit_move_insn (reg, alt_reg);
3026 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3028 if (current_frame_info.reg_save_ar_lc != 0)
3029 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3032 alt_regno = next_scratch_gr_reg ();
3033 alt_reg = gen_rtx_REG (DImode, alt_regno);
3034 do_restore (gen_movdi_x, alt_reg, cfa_off);
3037 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3038 emit_move_insn (reg, alt_reg);
3041 /* We should now be at the base of the gr/br/fr spill area. */
3042 if (cfa_off != (current_frame_info.spill_cfa_off
3043 + current_frame_info.spill_size))
3046 /* The GP may be stored on the stack in the prologue, but it's
3047 never restored in the epilogue. Skip the stack slot. */
3048 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3051 /* Restore all general registers. */
3052 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3053 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3055 reg = gen_rtx_REG (DImode, regno);
3056 do_restore (gen_gr_restore, reg, cfa_off);
3060 /* Restore the branch registers. Handle B0 specially, as it may
3061 have gotten stored in some GR register. */
3062 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3064 if (current_frame_info.reg_save_b0 != 0)
3065 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3068 alt_regno = next_scratch_gr_reg ();
3069 alt_reg = gen_rtx_REG (DImode, alt_regno);
3070 do_restore (gen_movdi_x, alt_reg, cfa_off);
3073 reg = gen_rtx_REG (DImode, BR_REG (0));
3074 emit_move_insn (reg, alt_reg);
3077 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3078 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3080 alt_regno = next_scratch_gr_reg ();
3081 alt_reg = gen_rtx_REG (DImode, alt_regno);
3082 do_restore (gen_movdi_x, alt_reg, cfa_off);
3084 reg = gen_rtx_REG (DImode, regno);
3085 emit_move_insn (reg, alt_reg);
3088 /* Restore floating point registers. */
3089 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3090 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3094 reg = gen_rtx_REG (XFmode, regno);
3095 do_restore (gen_fr_restore_x, reg, cfa_off);
3099 /* Restore ar.unat for real. */
3100 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3102 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3103 emit_move_insn (reg, ar_unat_save_reg);
3106 if (cfa_off != current_frame_info.spill_cfa_off)
3109 finish_spill_pointers ();
3111 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3113 /* ??? At this point we must generate a magic insn that appears to
3114 modify the spill iterators, the stack pointer, and the frame
3115 pointer. This would allow the most scheduling freedom. For now,
3117 emit_insn (gen_blockage ());
3120 if (cfun->machine->ia64_eh_epilogue_sp)
3121 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3122 else if (frame_pointer_needed)
3124 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3125 RTX_FRAME_RELATED_P (insn) = 1;
3127 else if (current_frame_info.total_size)
3129 rtx offset, frame_size_rtx;
3131 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3132 if (CONST_OK_FOR_I (current_frame_info.total_size))
3133 offset = frame_size_rtx;
3136 regno = next_scratch_gr_reg ();
3137 offset = gen_rtx_REG (DImode, regno);
3138 emit_move_insn (offset, frame_size_rtx);
3141 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3144 RTX_FRAME_RELATED_P (insn) = 1;
3145 if (GET_CODE (offset) != CONST_INT)
3148 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3149 gen_rtx_SET (VOIDmode,
3151 gen_rtx_PLUS (DImode,
3158 if (cfun->machine->ia64_eh_epilogue_bsp)
3159 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3162 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3165 int fp = GR_REG (2);
3166 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3167 first available call clobbered register. If there was a frame_pointer
3168 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3169 so we have to make sure we're using the string "r2" when emitting
3170 the register name for the assembler. */
3171 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3172 fp = HARD_FRAME_POINTER_REGNUM;
3174 /* We must emit an alloc to force the input registers to become output
3175 registers. Otherwise, if the callee tries to pass its parameters
3176 through to another call without an intervening alloc, then these
3178 /* ??? We don't need to preserve all input registers. We only need to
3179 preserve those input registers used as arguments to the sibling call.
3180 It is unclear how to compute that number here. */
3181 if (current_frame_info.n_input_regs != 0)
3182 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3183 const0_rtx, const0_rtx,
3184 GEN_INT (current_frame_info.n_input_regs),
3189 /* Return 1 if br.ret can do all the work required to return from a
3193 ia64_direct_return (void)
3195 if (reload_completed && ! frame_pointer_needed)
3197 ia64_compute_frame_size (get_frame_size ());
3199 return (current_frame_info.total_size == 0
3200 && current_frame_info.n_spilled == 0
3201 && current_frame_info.reg_save_b0 == 0
3202 && current_frame_info.reg_save_pr == 0
3203 && current_frame_info.reg_save_ar_pfs == 0
3204 && current_frame_info.reg_save_ar_unat == 0
3205 && current_frame_info.reg_save_ar_lc == 0);
3210 /* Return the magic cookie that we use to hold the return address
3211 during early compilation. */
3214 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3218 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3221 /* Split this value after reload, now that we know where the return
3222 address is saved. */
3225 ia64_split_return_addr_rtx (rtx dest)
3229 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3231 if (current_frame_info.reg_save_b0 != 0)
3232 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3238 /* Compute offset from CFA for BR0. */
3239 /* ??? Must be kept in sync with ia64_expand_prologue. */
3240 off = (current_frame_info.spill_cfa_off
3241 + current_frame_info.spill_size);
3242 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3243 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3246 /* Convert CFA offset to a register based offset. */
3247 if (frame_pointer_needed)
3248 src = hard_frame_pointer_rtx;
3251 src = stack_pointer_rtx;
3252 off += current_frame_info.total_size;
3255 /* Load address into scratch register. */
3256 if (CONST_OK_FOR_I (off))
3257 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3260 emit_move_insn (dest, GEN_INT (off));
3261 emit_insn (gen_adddi3 (dest, src, dest));
3264 src = gen_rtx_MEM (Pmode, dest);
3268 src = gen_rtx_REG (DImode, BR_REG (0));
3270 emit_move_insn (dest, src);
3274 ia64_hard_regno_rename_ok (int from, int to)
3276 /* Don't clobber any of the registers we reserved for the prologue. */
3277 if (to == current_frame_info.reg_fp
3278 || to == current_frame_info.reg_save_b0
3279 || to == current_frame_info.reg_save_pr
3280 || to == current_frame_info.reg_save_ar_pfs
3281 || to == current_frame_info.reg_save_ar_unat
3282 || to == current_frame_info.reg_save_ar_lc)
3285 if (from == current_frame_info.reg_fp
3286 || from == current_frame_info.reg_save_b0
3287 || from == current_frame_info.reg_save_pr
3288 || from == current_frame_info.reg_save_ar_pfs
3289 || from == current_frame_info.reg_save_ar_unat
3290 || from == current_frame_info.reg_save_ar_lc)
3293 /* Don't use output registers outside the register frame. */
3294 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3297 /* Retain even/oddness on predicate register pairs. */
3298 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3299 return (from & 1) == (to & 1);
3304 /* Target hook for assembling integer objects. Handle word-sized
3305 aligned objects and detect the cases when @fptr is needed. */
3308 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3310 if (size == POINTER_SIZE / BITS_PER_UNIT
3312 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3313 && GET_CODE (x) == SYMBOL_REF
3314 && SYMBOL_REF_FUNCTION_P (x))
3316 if (POINTER_SIZE == 32)
3317 fputs ("\tdata4\t@fptr(", asm_out_file);
3319 fputs ("\tdata8\t@fptr(", asm_out_file);
3320 output_addr_const (asm_out_file, x);
3321 fputs (")\n", asm_out_file);
3324 return default_assemble_integer (x, size, aligned_p);
3327 /* Emit the function prologue. */
3330 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3332 int mask, grsave, grsave_prev;
3334 if (current_frame_info.need_regstk)
3335 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3336 current_frame_info.n_input_regs,
3337 current_frame_info.n_local_regs,
3338 current_frame_info.n_output_regs,
3339 current_frame_info.n_rotate_regs);
3341 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3344 /* Emit the .prologue directive. */
3347 grsave = grsave_prev = 0;
3348 if (current_frame_info.reg_save_b0 != 0)
3351 grsave = grsave_prev = current_frame_info.reg_save_b0;
3353 if (current_frame_info.reg_save_ar_pfs != 0
3354 && (grsave_prev == 0
3355 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3358 if (grsave_prev == 0)
3359 grsave = current_frame_info.reg_save_ar_pfs;
3360 grsave_prev = current_frame_info.reg_save_ar_pfs;
3362 if (current_frame_info.reg_fp != 0
3363 && (grsave_prev == 0
3364 || current_frame_info.reg_fp == grsave_prev + 1))
3367 if (grsave_prev == 0)
3368 grsave = HARD_FRAME_POINTER_REGNUM;
3369 grsave_prev = current_frame_info.reg_fp;
3371 if (current_frame_info.reg_save_pr != 0
3372 && (grsave_prev == 0
3373 || current_frame_info.reg_save_pr == grsave_prev + 1))
3376 if (grsave_prev == 0)
3377 grsave = current_frame_info.reg_save_pr;
3380 if (mask && TARGET_GNU_AS)
3381 fprintf (file, "\t.prologue %d, %d\n", mask,
3382 ia64_dbx_register_number (grsave));
3384 fputs ("\t.prologue\n", file);
3386 /* Emit a .spill directive, if necessary, to relocate the base of
3387 the register spill area. */
3388 if (current_frame_info.spill_cfa_off != -16)
3389 fprintf (file, "\t.spill %ld\n",
3390 (long) (current_frame_info.spill_cfa_off
3391 + current_frame_info.spill_size));
3394 /* Emit the .body directive at the scheduled end of the prologue. */
3397 ia64_output_function_end_prologue (FILE *file)
3399 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3402 fputs ("\t.body\n", file);
3405 /* Emit the function epilogue. */
3408 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3409 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3413 if (current_frame_info.reg_fp)
3415 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3416 reg_names[HARD_FRAME_POINTER_REGNUM]
3417 = reg_names[current_frame_info.reg_fp];
3418 reg_names[current_frame_info.reg_fp] = tmp;
3420 if (! TARGET_REG_NAMES)
3422 for (i = 0; i < current_frame_info.n_input_regs; i++)
3423 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3424 for (i = 0; i < current_frame_info.n_local_regs; i++)
3425 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3426 for (i = 0; i < current_frame_info.n_output_regs; i++)
3427 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3430 current_frame_info.initialized = 0;
3434 ia64_dbx_register_number (int regno)
3436 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3437 from its home at loc79 to something inside the register frame. We
3438 must perform the same renumbering here for the debug info. */
3439 if (current_frame_info.reg_fp)
3441 if (regno == HARD_FRAME_POINTER_REGNUM)
3442 regno = current_frame_info.reg_fp;
3443 else if (regno == current_frame_info.reg_fp)
3444 regno = HARD_FRAME_POINTER_REGNUM;
3447 if (IN_REGNO_P (regno))
3448 return 32 + regno - IN_REG (0);
3449 else if (LOC_REGNO_P (regno))
3450 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3451 else if (OUT_REGNO_P (regno))
3452 return (32 + current_frame_info.n_input_regs
3453 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3459 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3461 rtx addr_reg, eight = GEN_INT (8);
3463 /* The Intel assembler requires that the global __ia64_trampoline symbol
3464 be declared explicitly */
3467 static bool declared_ia64_trampoline = false;
3469 if (!declared_ia64_trampoline)
3471 declared_ia64_trampoline = true;
3472 (*targetm.asm_out.globalize_label) (asm_out_file,
3473 "__ia64_trampoline");
3477 /* Load up our iterator. */
3478 addr_reg = gen_reg_rtx (Pmode);
3479 emit_move_insn (addr_reg, addr);
3481 /* The first two words are the fake descriptor:
3482 __ia64_trampoline, ADDR+16. */
3483 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3484 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3485 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3487 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3488 copy_to_reg (plus_constant (addr, 16)));
3489 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3491 /* The third word is the target descriptor. */
3492 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3493 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3495 /* The fourth word is the static chain. */
3496 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3499 /* Do any needed setup for a variadic function. CUM has not been updated
3500 for the last named argument which has type TYPE and mode MODE.
3502 We generate the actual spill instructions during prologue generation. */
3505 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3506 tree type, int * pretend_size,
3507 int second_time ATTRIBUTE_UNUSED)
3509 CUMULATIVE_ARGS next_cum = *cum;
3511 /* Skip the current argument. */
3512 ia64_function_arg_advance (&next_cum, mode, type, 1);
3514 if (next_cum.words < MAX_ARGUMENT_SLOTS)
3516 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
3517 *pretend_size = n * UNITS_PER_WORD;
3518 cfun->machine->n_varargs = n;
3522 /* Check whether TYPE is a homogeneous floating point aggregate. If
3523 it is, return the mode of the floating point type that appears
3524 in all leafs. If it is not, return VOIDmode.
3526 An aggregate is a homogeneous floating point aggregate is if all
3527 fields/elements in it have the same floating point type (e.g,
3528 SFmode). 128-bit quad-precision floats are excluded. */
3530 static enum machine_mode
3531 hfa_element_mode (tree type, int nested)
3533 enum machine_mode element_mode = VOIDmode;
3534 enum machine_mode mode;
3535 enum tree_code code = TREE_CODE (type);
3536 int know_element_mode = 0;
3541 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3542 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3543 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3544 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
3548 /* Fortran complex types are supposed to be HFAs, so we need to handle
3549 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3552 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3553 && TYPE_MODE (type) != TCmode)
3554 return GET_MODE_INNER (TYPE_MODE (type));
3559 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3560 mode if this is contained within an aggregate. */
3561 if (nested && TYPE_MODE (type) != TFmode)
3562 return TYPE_MODE (type);
3567 return hfa_element_mode (TREE_TYPE (type), 1);
3571 case QUAL_UNION_TYPE:
3572 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3574 if (TREE_CODE (t) != FIELD_DECL)
3577 mode = hfa_element_mode (TREE_TYPE (t), 1);
3578 if (know_element_mode)
3580 if (mode != element_mode)
3583 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3587 know_element_mode = 1;
3588 element_mode = mode;
3591 return element_mode;
3594 /* If we reach here, we probably have some front-end specific type
3595 that the backend doesn't know about. This can happen via the
3596 aggregate_value_p call in init_function_start. All we can do is
3597 ignore unknown tree types. */
3604 /* Return the number of words required to hold a quantity of TYPE and MODE
3605 when passed as an argument. */
3607 ia64_function_arg_words (tree type, enum machine_mode mode)
3611 if (mode == BLKmode)
3612 words = int_size_in_bytes (type);
3614 words = GET_MODE_SIZE (mode);
3616 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3619 /* Return the number of registers that should be skipped so the current
3620 argument (described by TYPE and WORDS) will be properly aligned.
3622 Integer and float arguments larger than 8 bytes start at the next
3623 even boundary. Aggregates larger than 8 bytes start at the next
3624 even boundary if the aggregate has 16 byte alignment. Note that
3625 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3626 but are still to be aligned in registers.
3628 ??? The ABI does not specify how to handle aggregates with
3629 alignment from 9 to 15 bytes, or greater than 16. We handle them
3630 all as if they had 16 byte alignment. Such aggregates can occur
3631 only if gcc extensions are used. */
3633 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3635 if ((cum->words & 1) == 0)
3639 && TREE_CODE (type) != INTEGER_TYPE
3640 && TREE_CODE (type) != REAL_TYPE)
3641 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3646 /* Return rtx for register where argument is passed, or zero if it is passed
3648 /* ??? 128-bit quad-precision floats are always passed in general
3652 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3653 int named, int incoming)
3655 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3656 int words = ia64_function_arg_words (type, mode);
3657 int offset = ia64_function_arg_offset (cum, type, words);
3658 enum machine_mode hfa_mode = VOIDmode;
3660 /* If all argument slots are used, then it must go on the stack. */
3661 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3664 /* Check for and handle homogeneous FP aggregates. */
3666 hfa_mode = hfa_element_mode (type, 0);
3668 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3669 and unprototyped hfas are passed specially. */
3670 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3674 int fp_regs = cum->fp_regs;
3675 int int_regs = cum->words + offset;
3676 int hfa_size = GET_MODE_SIZE (hfa_mode);
3680 /* If prototyped, pass it in FR regs then GR regs.
3681 If not prototyped, pass it in both FR and GR regs.
3683 If this is an SFmode aggregate, then it is possible to run out of
3684 FR regs while GR regs are still left. In that case, we pass the
3685 remaining part in the GR regs. */
3687 /* Fill the FP regs. We do this always. We stop if we reach the end
3688 of the argument, the last FP register, or the last argument slot. */
3690 byte_size = ((mode == BLKmode)
3691 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3692 args_byte_size = int_regs * UNITS_PER_WORD;
3694 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3695 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3697 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3698 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3702 args_byte_size += hfa_size;
3706 /* If no prototype, then the whole thing must go in GR regs. */
3707 if (! cum->prototype)
3709 /* If this is an SFmode aggregate, then we might have some left over
3710 that needs to go in GR regs. */
3711 else if (byte_size != offset)
3712 int_regs += offset / UNITS_PER_WORD;
3714 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3716 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3718 enum machine_mode gr_mode = DImode;
3719 unsigned int gr_size;
3721 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3722 then this goes in a GR reg left adjusted/little endian, right
3723 adjusted/big endian. */
3724 /* ??? Currently this is handled wrong, because 4-byte hunks are
3725 always right adjusted/little endian. */
3728 /* If we have an even 4 byte hunk because the aggregate is a
3729 multiple of 4 bytes in size, then this goes in a GR reg right
3730 adjusted/little endian. */
3731 else if (byte_size - offset == 4)
3734 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3735 gen_rtx_REG (gr_mode, (basereg
3739 gr_size = GET_MODE_SIZE (gr_mode);
3741 if (gr_size == UNITS_PER_WORD
3742 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
3744 else if (gr_size > UNITS_PER_WORD)
3745 int_regs += gr_size / UNITS_PER_WORD;
3748 /* If we ended up using just one location, just return that one loc, but
3749 change the mode back to the argument mode. However, we can't do this
3750 when hfa_mode is XFmode and mode is TImode. In that case, we would
3751 return a TImode reference to an FP reg, but FP regs can't hold TImode.
3752 We need the PARALLEL to make this work. This can happen for a union
3753 containing a single __float80 member. */
3754 if (i == 1 && ! (hfa_mode == XFmode && mode == TImode))
3755 return gen_rtx_REG (mode, REGNO (XEXP (loc[0], 0)));
3757 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3760 /* Integral and aggregates go in general registers. If we have run out of
3761 FR registers, then FP values must also go in general registers. This can
3762 happen when we have a SFmode HFA. */
3763 else if (mode == TFmode || mode == TCmode
3764 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3766 int byte_size = ((mode == BLKmode)
3767 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3768 if (BYTES_BIG_ENDIAN
3769 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3770 && byte_size < UNITS_PER_WORD
3773 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3774 gen_rtx_REG (DImode,
3775 (basereg + cum->words
3778 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3781 return gen_rtx_REG (mode, basereg + cum->words + offset);
3785 /* If there is a prototype, then FP values go in a FR register when
3786 named, and in a GR register when unnamed. */
3787 else if (cum->prototype)
3790 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3791 /* In big-endian mode, an anonymous SFmode value must be represented
3792 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3793 the value into the high half of the general register. */
3794 else if (BYTES_BIG_ENDIAN && mode == SFmode)
3795 return gen_rtx_PARALLEL (mode,
3797 gen_rtx_EXPR_LIST (VOIDmode,
3798 gen_rtx_REG (DImode, basereg + cum->words + offset),
3801 return gen_rtx_REG (mode, basereg + cum->words + offset);
3803 /* If there is no prototype, then FP values go in both FR and GR
3807 /* See comment above. */
3808 enum machine_mode inner_mode =
3809 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
3811 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3812 gen_rtx_REG (mode, (FR_ARG_FIRST
3815 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3816 gen_rtx_REG (inner_mode,
3817 (basereg + cum->words
3821 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3825 /* Return number of words, at the beginning of the argument, that must be
3826 put in registers. 0 is the argument is entirely in registers or entirely
3830 ia64_function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3831 tree type, int named ATTRIBUTE_UNUSED)
3833 int words = ia64_function_arg_words (type, mode);
3834 int offset = ia64_function_arg_offset (cum, type, words);
3836 /* If all argument slots are used, then it must go on the stack. */
3837 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3840 /* It doesn't matter whether the argument goes in FR or GR regs. If
3841 it fits within the 8 argument slots, then it goes entirely in
3842 registers. If it extends past the last argument slot, then the rest
3843 goes on the stack. */
3845 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3848 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3851 /* Update CUM to point after this argument. This is patterned after
3852 ia64_function_arg. */
3855 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3856 tree type, int named)
3858 int words = ia64_function_arg_words (type, mode);
3859 int offset = ia64_function_arg_offset (cum, type, words);
3860 enum machine_mode hfa_mode = VOIDmode;
3862 /* If all arg slots are already full, then there is nothing to do. */
3863 if (cum->words >= MAX_ARGUMENT_SLOTS)
3866 cum->words += words + offset;
3868 /* Check for and handle homogeneous FP aggregates. */
3870 hfa_mode = hfa_element_mode (type, 0);
3872 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3873 and unprototyped hfas are passed specially. */
3874 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3876 int fp_regs = cum->fp_regs;
3877 /* This is the original value of cum->words + offset. */
3878 int int_regs = cum->words - words;
3879 int hfa_size = GET_MODE_SIZE (hfa_mode);
3883 /* If prototyped, pass it in FR regs then GR regs.
3884 If not prototyped, pass it in both FR and GR regs.
3886 If this is an SFmode aggregate, then it is possible to run out of
3887 FR regs while GR regs are still left. In that case, we pass the
3888 remaining part in the GR regs. */
3890 /* Fill the FP regs. We do this always. We stop if we reach the end
3891 of the argument, the last FP register, or the last argument slot. */
3893 byte_size = ((mode == BLKmode)
3894 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3895 args_byte_size = int_regs * UNITS_PER_WORD;
3897 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3898 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3901 args_byte_size += hfa_size;
3905 cum->fp_regs = fp_regs;
3908 /* Integral and aggregates go in general registers. If we have run out of
3909 FR registers, then FP values must also go in general registers. This can
3910 happen when we have a SFmode HFA. */
3911 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3912 cum->int_regs = cum->words;
3914 /* If there is a prototype, then FP values go in a FR register when
3915 named, and in a GR register when unnamed. */
3916 else if (cum->prototype)
3919 cum->int_regs = cum->words;
3921 /* ??? Complex types should not reach here. */
3922 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3924 /* If there is no prototype, then FP values go in both FR and GR
3928 /* ??? Complex types should not reach here. */
3929 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3930 cum->int_regs = cum->words;
3934 /* Variable sized types are passed by reference. */
3935 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3938 ia64_function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3939 enum machine_mode mode ATTRIBUTE_UNUSED,
3940 tree type, int named ATTRIBUTE_UNUSED)
3942 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3945 /* True if it is OK to do sibling call optimization for the specified
3946 call expression EXP. DECL will be the called function, or NULL if
3947 this is an indirect call. */
3949 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3951 /* We must always return with our current GP. This means we can
3952 only sibcall to functions defined in the current module. */
3953 return decl && (*targetm.binds_local_p) (decl);
3957 /* Implement va_arg. */
3960 ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3962 /* Variable sized types are passed by reference. */
3963 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3965 tree ptrtype = build_pointer_type (type);
3966 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
3967 return build_fold_indirect_ref (addr);
3970 /* Aggregate arguments with alignment larger than 8 bytes start at
3971 the next even boundary. Integer and floating point arguments
3972 do so if they are larger than 8 bytes, whether or not they are
3973 also aligned larger than 8 bytes. */
3974 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
3975 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3977 tree t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3978 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3979 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3980 build_int_2 (-2 * UNITS_PER_WORD, -1));
3981 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3982 gimplify_and_add (t, pre_p);
3985 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3988 /* Return 1 if function return value returned in memory. Return 0 if it is
3992 ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
3994 enum machine_mode mode;
3995 enum machine_mode hfa_mode;
3996 HOST_WIDE_INT byte_size;
3998 mode = TYPE_MODE (valtype);
3999 byte_size = GET_MODE_SIZE (mode);
4000 if (mode == BLKmode)
4002 byte_size = int_size_in_bytes (valtype);
4007 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4009 hfa_mode = hfa_element_mode (valtype, 0);
4010 if (hfa_mode != VOIDmode)
4012 int hfa_size = GET_MODE_SIZE (hfa_mode);
4014 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4019 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4025 /* Return rtx for register that holds the function return value. */
4028 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
4030 enum machine_mode mode;
4031 enum machine_mode hfa_mode;
4033 mode = TYPE_MODE (valtype);
4034 hfa_mode = hfa_element_mode (valtype, 0);
4036 if (hfa_mode != VOIDmode)
4044 hfa_size = GET_MODE_SIZE (hfa_mode);
4045 byte_size = ((mode == BLKmode)
4046 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4048 for (i = 0; offset < byte_size; i++)
4050 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4051 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4057 return XEXP (loc[0], 0);
4059 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4061 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4062 return gen_rtx_REG (mode, FR_ARG_FIRST);
4065 if (BYTES_BIG_ENDIAN
4066 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4074 bytesize = int_size_in_bytes (valtype);
4075 for (i = 0; offset < bytesize; i++)
4077 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4078 gen_rtx_REG (DImode,
4081 offset += UNITS_PER_WORD;
4083 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4086 return gen_rtx_REG (mode, GR_RET_FIRST);
4090 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
4091 We need to emit DTP-relative relocations. */
4094 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4098 fputs ("\tdata8.ua\t@dtprel(", file);
4099 output_addr_const (file, x);
4103 /* Print a memory address as an operand to reference that memory location. */
4105 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4106 also call this from ia64_print_operand for memory addresses. */
4109 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4110 rtx address ATTRIBUTE_UNUSED)
4114 /* Print an operand to an assembler instruction.
4115 C Swap and print a comparison operator.
4116 D Print an FP comparison operator.
4117 E Print 32 - constant, for SImode shifts as extract.
4118 e Print 64 - constant, for DImode rotates.
4119 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4120 a floating point register emitted normally.
4121 I Invert a predicate register by adding 1.
4122 J Select the proper predicate register for a condition.
4123 j Select the inverse predicate register for a condition.
4124 O Append .acq for volatile load.
4125 P Postincrement of a MEM.
4126 Q Append .rel for volatile store.
4127 S Shift amount for shladd instruction.
4128 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4129 for Intel assembler.
4130 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4131 for Intel assembler.
4132 r Print register name, or constant 0 as r0. HP compatibility for
4135 ia64_print_operand (FILE * file, rtx x, int code)
4142 /* Handled below. */
4147 enum rtx_code c = swap_condition (GET_CODE (x));
4148 fputs (GET_RTX_NAME (c), file);
4153 switch (GET_CODE (x))
4165 str = GET_RTX_NAME (GET_CODE (x));
4172 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4176 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4180 if (x == CONST0_RTX (GET_MODE (x)))
4181 str = reg_names [FR_REG (0)];
4182 else if (x == CONST1_RTX (GET_MODE (x)))
4183 str = reg_names [FR_REG (1)];
4184 else if (GET_CODE (x) == REG)
4185 str = reg_names [REGNO (x)];
4192 fputs (reg_names [REGNO (x) + 1], file);
4198 unsigned int regno = REGNO (XEXP (x, 0));
4199 if (GET_CODE (x) == EQ)
4203 fputs (reg_names [regno], file);
4208 if (MEM_VOLATILE_P (x))
4209 fputs(".acq", file);
4214 HOST_WIDE_INT value;
4216 switch (GET_CODE (XEXP (x, 0)))
4222 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4223 if (GET_CODE (x) == CONST_INT)
4225 else if (GET_CODE (x) == REG)
4227 fprintf (file, ", %s", reg_names[REGNO (x)]);
4235 value = GET_MODE_SIZE (GET_MODE (x));
4239 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4243 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4248 if (MEM_VOLATILE_P (x))
4249 fputs(".rel", file);
4253 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4257 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4259 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4265 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4267 const char *prefix = "0x";
4268 if (INTVAL (x) & 0x80000000)
4270 fprintf (file, "0xffffffff");
4273 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4279 /* If this operand is the constant zero, write it as register zero.
4280 Any register, zero, or CONST_INT value is OK here. */
4281 if (GET_CODE (x) == REG)
4282 fputs (reg_names[REGNO (x)], file);
4283 else if (x == CONST0_RTX (GET_MODE (x)))
4285 else if (GET_CODE (x) == CONST_INT)
4286 output_addr_const (file, x);
4288 output_operand_lossage ("invalid %%r value");
4295 /* For conditional branches, returns or calls, substitute
4296 sptk, dptk, dpnt, or spnt for %s. */
4297 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4300 int pred_val = INTVAL (XEXP (x, 0));
4302 /* Guess top and bottom 10% statically predicted. */
4303 if (pred_val < REG_BR_PROB_BASE / 50)
4305 else if (pred_val < REG_BR_PROB_BASE / 2)
4307 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4312 else if (GET_CODE (current_output_insn) == CALL_INSN)
4317 fputs (which, file);
4322 x = current_insn_predicate;
4325 unsigned int regno = REGNO (XEXP (x, 0));
4326 if (GET_CODE (x) == EQ)
4328 fprintf (file, "(%s) ", reg_names [regno]);
4333 output_operand_lossage ("ia64_print_operand: unknown code");
4337 switch (GET_CODE (x))
4339 /* This happens for the spill/restore instructions. */
4344 /* ... fall through ... */
4347 fputs (reg_names [REGNO (x)], file);
4352 rtx addr = XEXP (x, 0);
4353 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4354 addr = XEXP (addr, 0);
4355 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4360 output_addr_const (file, x);
4367 /* Compute a (partial) cost for rtx X. Return true if the complete
4368 cost has been computed, and false if subexpressions should be
4369 scanned. In either case, *TOTAL contains the cost result. */
4370 /* ??? This is incomplete. */
4373 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4381 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4384 if (CONST_OK_FOR_I (INTVAL (x)))
4386 else if (CONST_OK_FOR_J (INTVAL (x)))
4389 *total = COSTS_N_INSNS (1);
4392 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4395 *total = COSTS_N_INSNS (1);
4400 *total = COSTS_N_INSNS (1);
4406 *total = COSTS_N_INSNS (3);
4410 /* For multiplies wider than HImode, we have to go to the FPU,
4411 which normally involves copies. Plus there's the latency
4412 of the multiply itself, and the latency of the instructions to
4413 transfer integer regs to FP regs. */
4414 /* ??? Check for FP mode. */
4415 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4416 *total = COSTS_N_INSNS (10);
4418 *total = COSTS_N_INSNS (2);
4426 *total = COSTS_N_INSNS (1);
4433 /* We make divide expensive, so that divide-by-constant will be
4434 optimized to a multiply. */
4435 *total = COSTS_N_INSNS (60);
4443 /* Calculate the cost of moving data from a register in class FROM to
4444 one in class TO, using MODE. */
4447 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4450 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4451 if (to == ADDL_REGS)
4453 if (from == ADDL_REGS)
4456 /* All costs are symmetric, so reduce cases by putting the
4457 lower number class as the destination. */
4460 enum reg_class tmp = to;
4461 to = from, from = tmp;
4464 /* Moving from FR<->GR in XFmode must be more expensive than 2,
4465 so that we get secondary memory reloads. Between FR_REGS,
4466 we have to make this at least as expensive as MEMORY_MOVE_COST
4467 to avoid spectacularly poor register class preferencing. */
4470 if (to != GR_REGS || from != GR_REGS)
4471 return MEMORY_MOVE_COST (mode, to, 0);
4479 /* Moving between PR registers takes two insns. */
4480 if (from == PR_REGS)
4482 /* Moving between PR and anything but GR is impossible. */
4483 if (from != GR_REGS)
4484 return MEMORY_MOVE_COST (mode, to, 0);
4488 /* Moving between BR and anything but GR is impossible. */
4489 if (from != GR_REGS && from != GR_AND_BR_REGS)
4490 return MEMORY_MOVE_COST (mode, to, 0);
4495 /* Moving between AR and anything but GR is impossible. */
4496 if (from != GR_REGS)
4497 return MEMORY_MOVE_COST (mode, to, 0);
4502 case GR_AND_FR_REGS:
4503 case GR_AND_BR_REGS:
4514 /* This function returns the register class required for a secondary
4515 register when copying between one of the registers in CLASS, and X,
4516 using MODE. A return value of NO_REGS means that no secondary register
4520 ia64_secondary_reload_class (enum reg_class class,
4521 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4525 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4526 regno = true_regnum (x);
4533 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4534 interaction. We end up with two pseudos with overlapping lifetimes
4535 both of which are equiv to the same constant, and both which need
4536 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4537 changes depending on the path length, which means the qty_first_reg
4538 check in make_regs_eqv can give different answers at different times.
4539 At some point I'll probably need a reload_indi pattern to handle
4542 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4543 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4544 non-general registers for good measure. */
4545 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4548 /* This is needed if a pseudo used as a call_operand gets spilled to a
4550 if (GET_CODE (x) == MEM)
4555 /* Need to go through general registers to get to other class regs. */
4556 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4559 /* This can happen when a paradoxical subreg is an operand to the
4561 /* ??? This shouldn't be necessary after instruction scheduling is
4562 enabled, because paradoxical subregs are not accepted by
4563 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4564 stop the paradoxical subreg stupidity in the *_operand functions
4566 if (GET_CODE (x) == MEM
4567 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4568 || GET_MODE (x) == QImode))
4571 /* This can happen because of the ior/and/etc patterns that accept FP
4572 registers as operands. If the third operand is a constant, then it
4573 needs to be reloaded into a FP register. */
4574 if (GET_CODE (x) == CONST_INT)
4577 /* This can happen because of register elimination in a muldi3 insn.
4578 E.g. `26107 * (unsigned long)&u'. */
4579 if (GET_CODE (x) == PLUS)
4584 /* ??? This happens if we cse/gcse a BImode value across a call,
4585 and the function has a nonlocal goto. This is because global
4586 does not allocate call crossing pseudos to hard registers when
4587 current_function_has_nonlocal_goto is true. This is relatively
4588 common for C++ programs that use exceptions. To reproduce,
4589 return NO_REGS and compile libstdc++. */
4590 if (GET_CODE (x) == MEM)
4593 /* This can happen when we take a BImode subreg of a DImode value,
4594 and that DImode value winds up in some non-GR register. */
4595 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4607 /* Emit text to declare externally defined variables and functions, because
4608 the Intel assembler does not support undefined externals. */
4611 ia64_asm_output_external (FILE *file, tree decl, const char *name)
4613 int save_referenced;
4615 /* GNU as does not need anything here, but the HP linker does need
4616 something for external functions. */
4620 || TREE_CODE (decl) != FUNCTION_DECL
4621 || strstr (name, "__builtin_") == name))
4624 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4625 the linker when we do this, so we need to be careful not to do this for
4626 builtin functions which have no library equivalent. Unfortunately, we
4627 can't tell here whether or not a function will actually be called by
4628 expand_expr, so we pull in library functions even if we may not need
4630 if (! strcmp (name, "__builtin_next_arg")
4631 || ! strcmp (name, "alloca")
4632 || ! strcmp (name, "__builtin_constant_p")
4633 || ! strcmp (name, "__builtin_args_info"))
4637 ia64_hpux_add_extern_decl (decl);
4640 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4642 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4643 if (TREE_CODE (decl) == FUNCTION_DECL)
4644 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4645 (*targetm.asm_out.globalize_label) (file, name);
4646 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4650 /* Parse the -mfixed-range= option string. */
4653 fix_range (const char *const_str)
4656 char *str, *dash, *comma;
4658 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4659 REG2 are either register names or register numbers. The effect
4660 of this option is to mark the registers in the range from REG1 to
4661 REG2 as ``fixed'' so they won't be used by the compiler. This is
4662 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4664 i = strlen (const_str);
4665 str = (char *) alloca (i + 1);
4666 memcpy (str, const_str, i + 1);
4670 dash = strchr (str, '-');
4673 warning ("value of -mfixed-range must have form REG1-REG2");
4678 comma = strchr (dash + 1, ',');
4682 first = decode_reg_name (str);
4685 warning ("unknown register name: %s", str);
4689 last = decode_reg_name (dash + 1);
4692 warning ("unknown register name: %s", dash + 1);
4700 warning ("%s-%s is an empty range", str, dash + 1);
4704 for (i = first; i <= last; ++i)
4705 fixed_regs[i] = call_used_regs[i] = 1;
4715 static struct machine_function *
4716 ia64_init_machine_status (void)
4718 return ggc_alloc_cleared (sizeof (struct machine_function));
4721 /* Handle TARGET_OPTIONS switches. */
4724 ia64_override_options (void)
4728 const char *const name; /* processor name or nickname. */
4729 const enum processor_type processor;
4731 const processor_alias_table[] =
4733 {"itanium", PROCESSOR_ITANIUM},
4734 {"itanium1", PROCESSOR_ITANIUM},
4735 {"merced", PROCESSOR_ITANIUM},
4736 {"itanium2", PROCESSOR_ITANIUM2},
4737 {"mckinley", PROCESSOR_ITANIUM2},
4740 int const pta_size = ARRAY_SIZE (processor_alias_table);
4743 if (TARGET_AUTO_PIC)
4744 target_flags |= MASK_CONST_GP;
4746 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4748 if ((target_flags_explicit & MASK_INLINE_FLOAT_DIV_LAT)
4749 && (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR))
4751 warning ("cannot optimize floating point division for both latency and throughput");
4752 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4756 if (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR)
4757 target_flags &= ~MASK_INLINE_FLOAT_DIV_LAT;
4759 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4763 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4765 if ((target_flags_explicit & MASK_INLINE_INT_DIV_LAT)
4766 && (target_flags_explicit & MASK_INLINE_INT_DIV_THR))
4768 warning ("cannot optimize integer division for both latency and throughput");
4769 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4773 if (target_flags_explicit & MASK_INLINE_INT_DIV_THR)
4774 target_flags &= ~MASK_INLINE_INT_DIV_LAT;
4776 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4780 if (TARGET_INLINE_SQRT_LAT && TARGET_INLINE_SQRT_THR)
4782 if ((target_flags_explicit & MASK_INLINE_SQRT_LAT)
4783 && (target_flags_explicit & MASK_INLINE_SQRT_THR))
4785 warning ("cannot optimize square root for both latency and throughput");
4786 target_flags &= ~MASK_INLINE_SQRT_THR;
4790 if (target_flags_explicit & MASK_INLINE_SQRT_THR)
4791 target_flags &= ~MASK_INLINE_SQRT_LAT;
4793 target_flags &= ~MASK_INLINE_SQRT_THR;
4797 if (TARGET_INLINE_SQRT_LAT)
4799 warning ("not yet implemented: latency-optimized inline square root");
4800 target_flags &= ~MASK_INLINE_SQRT_LAT;
4803 if (ia64_fixed_range_string)
4804 fix_range (ia64_fixed_range_string);
4806 if (ia64_tls_size_string)
4809 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4810 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4811 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4813 ia64_tls_size = tmp;
4816 if (!ia64_tune_string)
4817 ia64_tune_string = "itanium2";
4819 for (i = 0; i < pta_size; i++)
4820 if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
4822 ia64_tune = processor_alias_table[i].processor;
4827 error ("bad value (%s) for -tune= switch", ia64_tune_string);
4829 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4830 flag_schedule_insns_after_reload = 0;
4832 /* Variable tracking should be run after all optimizations which change order
4833 of insns. It also needs a valid CFG. */
4834 ia64_flag_var_tracking = flag_var_tracking;
4835 flag_var_tracking = 0;
4837 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4839 init_machine_status = ia64_init_machine_status;
4842 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4843 static enum attr_type ia64_safe_type (rtx);
4845 static enum attr_itanium_class
4846 ia64_safe_itanium_class (rtx insn)
4848 if (recog_memoized (insn) >= 0)
4849 return get_attr_itanium_class (insn);
4851 return ITANIUM_CLASS_UNKNOWN;
4854 static enum attr_type
4855 ia64_safe_type (rtx insn)
4857 if (recog_memoized (insn) >= 0)
4858 return get_attr_type (insn);
4860 return TYPE_UNKNOWN;
4863 /* The following collection of routines emit instruction group stop bits as
4864 necessary to avoid dependencies. */
4866 /* Need to track some additional registers as far as serialization is
4867 concerned so we can properly handle br.call and br.ret. We could
4868 make these registers visible to gcc, but since these registers are
4869 never explicitly used in gcc generated code, it seems wasteful to
4870 do so (plus it would make the call and return patterns needlessly
4872 #define REG_RP (BR_REG (0))
4873 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4874 /* This is used for volatile asms which may require a stop bit immediately
4875 before and after them. */
4876 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4877 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4878 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4880 /* For each register, we keep track of how it has been written in the
4881 current instruction group.
4883 If a register is written unconditionally (no qualifying predicate),
4884 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4886 If a register is written if its qualifying predicate P is true, we
4887 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4888 may be written again by the complement of P (P^1) and when this happens,
4889 WRITE_COUNT gets set to 2.
4891 The result of this is that whenever an insn attempts to write a register
4892 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4894 If a predicate register is written by a floating-point insn, we set
4895 WRITTEN_BY_FP to true.
4897 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4898 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4900 struct reg_write_state
4902 unsigned int write_count : 2;
4903 unsigned int first_pred : 16;
4904 unsigned int written_by_fp : 1;
4905 unsigned int written_by_and : 1;
4906 unsigned int written_by_or : 1;
4909 /* Cumulative info for the current instruction group. */
4910 struct reg_write_state rws_sum[NUM_REGS];
4911 /* Info for the current instruction. This gets copied to rws_sum after a
4912 stop bit is emitted. */
4913 struct reg_write_state rws_insn[NUM_REGS];
4915 /* Indicates whether this is the first instruction after a stop bit,
4916 in which case we don't need another stop bit. Without this, we hit
4917 the abort in ia64_variable_issue when scheduling an alloc. */
4918 static int first_instruction;
4920 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4921 RTL for one instruction. */
4924 unsigned int is_write : 1; /* Is register being written? */
4925 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4926 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4927 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4928 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4929 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4932 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4933 static int rws_access_regno (int, struct reg_flags, int);
4934 static int rws_access_reg (rtx, struct reg_flags, int);
4935 static void update_set_flags (rtx, struct reg_flags *, int *, rtx *);
4936 static int set_src_needs_barrier (rtx, struct reg_flags, int, rtx);
4937 static int rtx_needs_barrier (rtx, struct reg_flags, int);
4938 static void init_insn_group_barriers (void);
4939 static int group_barrier_needed_p (rtx);
4940 static int safe_group_barrier_needed_p (rtx);
4942 /* Update *RWS for REGNO, which is being written by the current instruction,
4943 with predicate PRED, and associated register flags in FLAGS. */
4946 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
4949 rws[regno].write_count++;
4951 rws[regno].write_count = 2;
4952 rws[regno].written_by_fp |= flags.is_fp;
4953 /* ??? Not tracking and/or across differing predicates. */
4954 rws[regno].written_by_and = flags.is_and;
4955 rws[regno].written_by_or = flags.is_or;
4956 rws[regno].first_pred = pred;
4959 /* Handle an access to register REGNO of type FLAGS using predicate register
4960 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4961 a dependency with an earlier instruction in the same group. */
4964 rws_access_regno (int regno, struct reg_flags flags, int pred)
4966 int need_barrier = 0;
4968 if (regno >= NUM_REGS)
4971 if (! PR_REGNO_P (regno))
4972 flags.is_and = flags.is_or = 0;
4978 /* One insn writes same reg multiple times? */
4979 if (rws_insn[regno].write_count > 0)
4982 /* Update info for current instruction. */
4983 rws_update (rws_insn, regno, flags, pred);
4984 write_count = rws_sum[regno].write_count;
4986 switch (write_count)
4989 /* The register has not been written yet. */
4990 rws_update (rws_sum, regno, flags, pred);
4994 /* The register has been written via a predicate. If this is
4995 not a complementary predicate, then we need a barrier. */
4996 /* ??? This assumes that P and P+1 are always complementary
4997 predicates for P even. */
4998 if (flags.is_and && rws_sum[regno].written_by_and)
5000 else if (flags.is_or && rws_sum[regno].written_by_or)
5002 else if ((rws_sum[regno].first_pred ^ 1) != pred)
5004 rws_update (rws_sum, regno, flags, pred);
5008 /* The register has been unconditionally written already. We
5010 if (flags.is_and && rws_sum[regno].written_by_and)
5012 else if (flags.is_or && rws_sum[regno].written_by_or)
5016 rws_sum[regno].written_by_and = flags.is_and;
5017 rws_sum[regno].written_by_or = flags.is_or;
5026 if (flags.is_branch)
5028 /* Branches have several RAW exceptions that allow to avoid
5031 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5032 /* RAW dependencies on branch regs are permissible as long
5033 as the writer is a non-branch instruction. Since we
5034 never generate code that uses a branch register written
5035 by a branch instruction, handling this case is
5039 if (REGNO_REG_CLASS (regno) == PR_REGS
5040 && ! rws_sum[regno].written_by_fp)
5041 /* The predicates of a branch are available within the
5042 same insn group as long as the predicate was written by
5043 something other than a floating-point instruction. */
5047 if (flags.is_and && rws_sum[regno].written_by_and)
5049 if (flags.is_or && rws_sum[regno].written_by_or)
5052 switch (rws_sum[regno].write_count)
5055 /* The register has not been written yet. */
5059 /* The register has been written via a predicate. If this is
5060 not a complementary predicate, then we need a barrier. */
5061 /* ??? This assumes that P and P+1 are always complementary
5062 predicates for P even. */
5063 if ((rws_sum[regno].first_pred ^ 1) != pred)
5068 /* The register has been unconditionally written already. We
5078 return need_barrier;
5082 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5084 int regno = REGNO (reg);
5085 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5088 return rws_access_regno (regno, flags, pred);
5091 int need_barrier = 0;
5093 need_barrier |= rws_access_regno (regno + n, flags, pred);
5094 return need_barrier;
5098 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5099 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5102 update_set_flags (rtx x, struct reg_flags *pflags, int *ppred, rtx *pcond)
5104 rtx src = SET_SRC (x);
5108 switch (GET_CODE (src))
5114 if (SET_DEST (x) == pc_rtx)
5115 /* X is a conditional branch. */
5119 int is_complemented = 0;
5121 /* X is a conditional move. */
5122 rtx cond = XEXP (src, 0);
5123 if (GET_CODE (cond) == EQ)
5124 is_complemented = 1;
5125 cond = XEXP (cond, 0);
5126 if (GET_CODE (cond) != REG
5127 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5130 if (XEXP (src, 1) == SET_DEST (x)
5131 || XEXP (src, 2) == SET_DEST (x))
5133 /* X is a conditional move that conditionally writes the
5136 /* We need another complement in this case. */
5137 if (XEXP (src, 1) == SET_DEST (x))
5138 is_complemented = ! is_complemented;
5140 *ppred = REGNO (cond);
5141 if (is_complemented)
5145 /* ??? If this is a conditional write to the dest, then this
5146 instruction does not actually read one source. This probably
5147 doesn't matter, because that source is also the dest. */
5148 /* ??? Multiple writes to predicate registers are allowed
5149 if they are all AND type compares, or if they are all OR
5150 type compares. We do not generate such instructions
5153 /* ... fall through ... */
5156 if (COMPARISON_P (src)
5157 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
5158 /* Set pflags->is_fp to 1 so that we know we're dealing
5159 with a floating point comparison when processing the
5160 destination of the SET. */
5163 /* Discover if this is a parallel comparison. We only handle
5164 and.orcm and or.andcm at present, since we must retain a
5165 strict inverse on the predicate pair. */
5166 else if (GET_CODE (src) == AND)
5168 else if (GET_CODE (src) == IOR)
5175 /* Subroutine of rtx_needs_barrier; this function determines whether the
5176 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5177 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5181 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred, rtx cond)
5183 int need_barrier = 0;
5185 rtx src = SET_SRC (x);
5187 if (GET_CODE (src) == CALL)
5188 /* We don't need to worry about the result registers that
5189 get written by subroutine call. */
5190 return rtx_needs_barrier (src, flags, pred);
5191 else if (SET_DEST (x) == pc_rtx)
5193 /* X is a conditional branch. */
5194 /* ??? This seems redundant, as the caller sets this bit for
5196 flags.is_branch = 1;
5197 return rtx_needs_barrier (src, flags, pred);
5200 need_barrier = rtx_needs_barrier (src, flags, pred);
5202 /* This instruction unconditionally uses a predicate register. */
5204 need_barrier |= rws_access_reg (cond, flags, 0);
5207 if (GET_CODE (dst) == ZERO_EXTRACT)
5209 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5210 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5211 dst = XEXP (dst, 0);
5213 return need_barrier;
5216 /* Handle an access to rtx X of type FLAGS using predicate register
5217 PRED. Return 1 if this access creates a dependency with an earlier
5218 instruction in the same group. */
5221 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5224 int is_complemented = 0;
5225 int need_barrier = 0;
5226 const char *format_ptr;
5227 struct reg_flags new_flags;
5235 switch (GET_CODE (x))
5238 update_set_flags (x, &new_flags, &pred, &cond);
5239 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
5240 if (GET_CODE (SET_SRC (x)) != CALL)
5242 new_flags.is_write = 1;
5243 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5248 new_flags.is_write = 0;
5249 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5251 /* Avoid multiple register writes, in case this is a pattern with
5252 multiple CALL rtx. This avoids an abort in rws_access_reg. */
5253 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5255 new_flags.is_write = 1;
5256 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5257 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5258 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5263 /* X is a predicated instruction. */
5265 cond = COND_EXEC_TEST (x);
5268 need_barrier = rtx_needs_barrier (cond, flags, 0);
5270 if (GET_CODE (cond) == EQ)
5271 is_complemented = 1;
5272 cond = XEXP (cond, 0);
5273 if (GET_CODE (cond) != REG
5274 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5276 pred = REGNO (cond);
5277 if (is_complemented)
5280 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5281 return need_barrier;
5285 /* Clobber & use are for earlier compiler-phases only. */
5290 /* We always emit stop bits for traditional asms. We emit stop bits
5291 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5292 if (GET_CODE (x) != ASM_OPERANDS
5293 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5295 /* Avoid writing the register multiple times if we have multiple
5296 asm outputs. This avoids an abort in rws_access_reg. */
5297 if (! rws_insn[REG_VOLATILE].write_count)
5299 new_flags.is_write = 1;
5300 rws_access_regno (REG_VOLATILE, new_flags, pred);
5305 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5306 We can not just fall through here since then we would be confused
5307 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5308 traditional asms unlike their normal usage. */
5310 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5311 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5316 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5318 rtx pat = XVECEXP (x, 0, i);
5319 if (GET_CODE (pat) == SET)
5321 update_set_flags (pat, &new_flags, &pred, &cond);
5322 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
5324 else if (GET_CODE (pat) == USE
5325 || GET_CODE (pat) == CALL
5326 || GET_CODE (pat) == ASM_OPERANDS)
5327 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5328 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
5331 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5333 rtx pat = XVECEXP (x, 0, i);
5334 if (GET_CODE (pat) == SET)
5336 if (GET_CODE (SET_SRC (pat)) != CALL)
5338 new_flags.is_write = 1;
5339 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5343 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5344 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5352 if (REGNO (x) == AR_UNAT_REGNUM)
5354 for (i = 0; i < 64; ++i)
5355 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5358 need_barrier = rws_access_reg (x, flags, pred);
5362 /* Find the regs used in memory address computation. */
5363 new_flags.is_write = 0;
5364 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5367 case CONST_INT: case CONST_DOUBLE:
5368 case SYMBOL_REF: case LABEL_REF: case CONST:
5371 /* Operators with side-effects. */
5372 case POST_INC: case POST_DEC:
5373 if (GET_CODE (XEXP (x, 0)) != REG)
5376 new_flags.is_write = 0;
5377 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5378 new_flags.is_write = 1;
5379 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5383 if (GET_CODE (XEXP (x, 0)) != REG)
5386 new_flags.is_write = 0;
5387 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5388 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5389 new_flags.is_write = 1;
5390 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5393 /* Handle common unary and binary ops for efficiency. */
5394 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5395 case MOD: case UDIV: case UMOD: case AND: case IOR:
5396 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5397 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5398 case NE: case EQ: case GE: case GT: case LE:
5399 case LT: case GEU: case GTU: case LEU: case LTU:
5400 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5401 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5404 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5405 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5406 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5407 case SQRT: case FFS: case POPCOUNT:
5408 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5412 switch (XINT (x, 1))
5414 case UNSPEC_LTOFF_DTPMOD:
5415 case UNSPEC_LTOFF_DTPREL:
5417 case UNSPEC_LTOFF_TPREL:
5419 case UNSPEC_PRED_REL_MUTEX:
5420 case UNSPEC_PIC_CALL:
5422 case UNSPEC_FETCHADD_ACQ:
5423 case UNSPEC_BSP_VALUE:
5424 case UNSPEC_FLUSHRS:
5425 case UNSPEC_BUNDLE_SELECTOR:
5428 case UNSPEC_GR_SPILL:
5429 case UNSPEC_GR_RESTORE:
5431 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5432 HOST_WIDE_INT bit = (offset >> 3) & 63;
5434 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5435 new_flags.is_write = (XINT (x, 1) == 1);
5436 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5441 case UNSPEC_FR_SPILL:
5442 case UNSPEC_FR_RESTORE:
5443 case UNSPEC_GETF_EXP:
5444 case UNSPEC_SETF_EXP:
5446 case UNSPEC_FR_SQRT_RECIP_APPROX:
5447 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5450 case UNSPEC_FR_RECIP_APPROX:
5451 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5452 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5455 case UNSPEC_CMPXCHG_ACQ:
5456 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5457 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5465 case UNSPEC_VOLATILE:
5466 switch (XINT (x, 1))
5469 /* Alloc must always be the first instruction of a group.
5470 We force this by always returning true. */
5471 /* ??? We might get better scheduling if we explicitly check for
5472 input/local/output register dependencies, and modify the
5473 scheduler so that alloc is always reordered to the start of
5474 the current group. We could then eliminate all of the
5475 first_instruction code. */
5476 rws_access_regno (AR_PFS_REGNUM, flags, pred);
5478 new_flags.is_write = 1;
5479 rws_access_regno (REG_AR_CFM, new_flags, pred);
5482 case UNSPECV_SET_BSP:
5486 case UNSPECV_BLOCKAGE:
5487 case UNSPECV_INSN_GROUP_BARRIER:
5489 case UNSPECV_PSAC_ALL:
5490 case UNSPECV_PSAC_NORMAL:
5499 new_flags.is_write = 0;
5500 need_barrier = rws_access_regno (REG_RP, flags, pred);
5501 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5503 new_flags.is_write = 1;
5504 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5505 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5509 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5510 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5511 switch (format_ptr[i])
5513 case '0': /* unused field */
5514 case 'i': /* integer */
5515 case 'n': /* note */
5516 case 'w': /* wide integer */
5517 case 's': /* pointer to string */
5518 case 'S': /* optional pointer to string */
5522 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5527 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5528 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5537 return need_barrier;
5540 /* Clear out the state for group_barrier_needed_p at the start of a
5541 sequence of insns. */
5544 init_insn_group_barriers (void)
5546 memset (rws_sum, 0, sizeof (rws_sum));
5547 first_instruction = 1;
5550 /* Given the current state, recorded by previous calls to this function,
5551 determine whether a group barrier (a stop bit) is necessary before INSN.
5552 Return nonzero if so. */
5555 group_barrier_needed_p (rtx insn)
5558 int need_barrier = 0;
5559 struct reg_flags flags;
5561 memset (&flags, 0, sizeof (flags));
5562 switch (GET_CODE (insn))
5568 /* A barrier doesn't imply an instruction group boundary. */
5572 memset (rws_insn, 0, sizeof (rws_insn));
5576 flags.is_branch = 1;
5577 flags.is_sibcall = SIBLING_CALL_P (insn);
5578 memset (rws_insn, 0, sizeof (rws_insn));
5580 /* Don't bundle a call following another call. */
5581 if ((pat = prev_active_insn (insn))
5582 && GET_CODE (pat) == CALL_INSN)
5588 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5592 flags.is_branch = 1;
5594 /* Don't bundle a jump following a call. */
5595 if ((pat = prev_active_insn (insn))
5596 && GET_CODE (pat) == CALL_INSN)
5604 if (GET_CODE (PATTERN (insn)) == USE
5605 || GET_CODE (PATTERN (insn)) == CLOBBER)
5606 /* Don't care about USE and CLOBBER "insns"---those are used to
5607 indicate to the optimizer that it shouldn't get rid of
5608 certain operations. */
5611 pat = PATTERN (insn);
5613 /* Ug. Hack hacks hacked elsewhere. */
5614 switch (recog_memoized (insn))
5616 /* We play dependency tricks with the epilogue in order
5617 to get proper schedules. Undo this for dv analysis. */
5618 case CODE_FOR_epilogue_deallocate_stack:
5619 case CODE_FOR_prologue_allocate_stack:
5620 pat = XVECEXP (pat, 0, 0);
5623 /* The pattern we use for br.cloop confuses the code above.
5624 The second element of the vector is representative. */
5625 case CODE_FOR_doloop_end_internal:
5626 pat = XVECEXP (pat, 0, 1);
5629 /* Doesn't generate code. */
5630 case CODE_FOR_pred_rel_mutex:
5631 case CODE_FOR_prologue_use:
5638 memset (rws_insn, 0, sizeof (rws_insn));
5639 need_barrier = rtx_needs_barrier (pat, flags, 0);
5641 /* Check to see if the previous instruction was a volatile
5644 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5651 if (first_instruction && INSN_P (insn)
5652 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5653 && GET_CODE (PATTERN (insn)) != USE
5654 && GET_CODE (PATTERN (insn)) != CLOBBER)
5657 first_instruction = 0;
5660 return need_barrier;
5663 /* Like group_barrier_needed_p, but do not clobber the current state. */
5666 safe_group_barrier_needed_p (rtx insn)
5668 struct reg_write_state rws_saved[NUM_REGS];
5669 int saved_first_instruction;
5672 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5673 saved_first_instruction = first_instruction;
5675 t = group_barrier_needed_p (insn);
5677 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5678 first_instruction = saved_first_instruction;
5683 /* Scan the current function and insert stop bits as necessary to
5684 eliminate dependencies. This function assumes that a final
5685 instruction scheduling pass has been run which has already
5686 inserted most of the necessary stop bits. This function only
5687 inserts new ones at basic block boundaries, since these are
5688 invisible to the scheduler. */
5691 emit_insn_group_barriers (FILE *dump)
5695 int insns_since_last_label = 0;
5697 init_insn_group_barriers ();
5699 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5701 if (GET_CODE (insn) == CODE_LABEL)
5703 if (insns_since_last_label)
5705 insns_since_last_label = 0;
5707 else if (GET_CODE (insn) == NOTE
5708 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5710 if (insns_since_last_label)
5712 insns_since_last_label = 0;
5714 else if (GET_CODE (insn) == INSN
5715 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5716 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5718 init_insn_group_barriers ();
5721 else if (INSN_P (insn))
5723 insns_since_last_label = 1;
5725 if (group_barrier_needed_p (insn))
5730 fprintf (dump, "Emitting stop before label %d\n",
5731 INSN_UID (last_label));
5732 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5735 init_insn_group_barriers ();
5743 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5744 This function has to emit all necessary group barriers. */
5747 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5751 init_insn_group_barriers ();
5753 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5755 if (GET_CODE (insn) == BARRIER)
5757 rtx last = prev_active_insn (insn);
5761 if (GET_CODE (last) == JUMP_INSN
5762 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5763 last = prev_active_insn (last);
5764 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5765 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5767 init_insn_group_barriers ();
5769 else if (INSN_P (insn))
5771 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5772 init_insn_group_barriers ();
5773 else if (group_barrier_needed_p (insn))
5775 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5776 init_insn_group_barriers ();
5777 group_barrier_needed_p (insn);
5784 static int errata_find_address_regs (rtx *, void *);
5785 static void errata_emit_nops (rtx);
5786 static void fixup_errata (void);
5788 /* This structure is used to track some details about the previous insns
5789 groups so we can determine if it may be necessary to insert NOPs to
5790 workaround hardware errata. */
5793 HARD_REG_SET p_reg_set;
5794 HARD_REG_SET gr_reg_conditionally_set;
5797 /* Index into the last_group array. */
5798 static int group_idx;
5800 /* Called through for_each_rtx; determines if a hard register that was
5801 conditionally set in the previous group is used as an address register.
5802 It ensures that for_each_rtx returns 1 in that case. */
5804 errata_find_address_regs (rtx *xp, void *data ATTRIBUTE_UNUSED)
5807 if (GET_CODE (x) != MEM)
5810 if (GET_CODE (x) == POST_MODIFY)
5812 if (GET_CODE (x) == REG)
5814 struct group *prev_group = last_group + (group_idx ^ 1);
5815 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5823 /* Called for each insn; this function keeps track of the state in
5824 last_group and emits additional NOPs if necessary to work around
5825 an Itanium A/B step erratum. */
5827 errata_emit_nops (rtx insn)
5829 struct group *this_group = last_group + group_idx;
5830 struct group *prev_group = last_group + (group_idx ^ 1);
5831 rtx pat = PATTERN (insn);
5832 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5833 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5834 enum attr_type type;
5837 if (GET_CODE (real_pat) == USE
5838 || GET_CODE (real_pat) == CLOBBER
5839 || GET_CODE (real_pat) == ASM_INPUT
5840 || GET_CODE (real_pat) == ADDR_VEC
5841 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5842 || asm_noperands (PATTERN (insn)) >= 0)
5845 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5848 if (GET_CODE (set) == PARALLEL)
5851 set = XVECEXP (real_pat, 0, 0);
5852 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5853 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5854 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5861 if (set && GET_CODE (set) != SET)
5864 type = get_attr_type (insn);
5867 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5868 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5870 if ((type == TYPE_M || type == TYPE_A) && cond && set
5871 && REG_P (SET_DEST (set))
5872 && GET_CODE (SET_SRC (set)) != PLUS
5873 && GET_CODE (SET_SRC (set)) != MINUS
5874 && (GET_CODE (SET_SRC (set)) != ASHIFT
5875 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5876 && (GET_CODE (SET_SRC (set)) != MEM
5877 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5878 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5880 if (!COMPARISON_P (cond)
5881 || !REG_P (XEXP (cond, 0)))
5884 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5885 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5887 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5889 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5890 emit_insn_before (gen_nop (), insn);
5891 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5893 memset (last_group, 0, sizeof last_group);
5897 /* Emit extra nops if they are required to work around hardware errata. */
5904 if (! TARGET_B_STEP)
5908 memset (last_group, 0, sizeof last_group);
5910 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5915 if (ia64_safe_type (insn) == TYPE_S)
5918 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5921 errata_emit_nops (insn);
5926 /* Instruction scheduling support. */
5928 #define NR_BUNDLES 10
5930 /* A list of names of all available bundles. */
5932 static const char *bundle_name [NR_BUNDLES] =
5938 #if NR_BUNDLES == 10
5948 /* Nonzero if we should insert stop bits into the schedule. */
5950 int ia64_final_schedule = 0;
5952 /* Codes of the corresponding quieryied units: */
5954 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5955 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5957 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5958 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5960 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5962 /* The following variable value is an insn group barrier. */
5964 static rtx dfa_stop_insn;
5966 /* The following variable value is the last issued insn. */
5968 static rtx last_scheduled_insn;
5970 /* The following variable value is size of the DFA state. */
5972 static size_t dfa_state_size;
5974 /* The following variable value is pointer to a DFA state used as
5975 temporary variable. */
5977 static state_t temp_dfa_state = NULL;
5979 /* The following variable value is DFA state after issuing the last
5982 static state_t prev_cycle_state = NULL;
5984 /* The following array element values are TRUE if the corresponding
5985 insn requires to add stop bits before it. */
5987 static char *stops_p;
5989 /* The following variable is used to set up the mentioned above array. */
5991 static int stop_before_p = 0;
5993 /* The following variable value is length of the arrays `clocks' and
5996 static int clocks_length;
5998 /* The following array element values are cycles on which the
5999 corresponding insn will be issued. The array is used only for
6004 /* The following array element values are numbers of cycles should be
6005 added to improve insn scheduling for MM_insns for Itanium1. */
6007 static int *add_cycles;
6009 static rtx ia64_single_set (rtx);
6010 static void ia64_emit_insn_before (rtx, rtx);
6012 /* Map a bundle number to its pseudo-op. */
6015 get_bundle_name (int b)
6017 return bundle_name[b];
6021 /* Return the maximum number of instructions a cpu can issue. */
6024 ia64_issue_rate (void)
6029 /* Helper function - like single_set, but look inside COND_EXEC. */
6032 ia64_single_set (rtx insn)
6034 rtx x = PATTERN (insn), ret;
6035 if (GET_CODE (x) == COND_EXEC)
6036 x = COND_EXEC_CODE (x);
6037 if (GET_CODE (x) == SET)
6040 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6041 Although they are not classical single set, the second set is there just
6042 to protect it from moving past FP-relative stack accesses. */
6043 switch (recog_memoized (insn))
6045 case CODE_FOR_prologue_allocate_stack:
6046 case CODE_FOR_epilogue_deallocate_stack:
6047 ret = XVECEXP (x, 0, 0);
6051 ret = single_set_2 (insn, x);
6058 /* Adjust the cost of a scheduling dependency. Return the new cost of
6059 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
6062 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
6064 enum attr_itanium_class dep_class;
6065 enum attr_itanium_class insn_class;
6067 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
6070 insn_class = ia64_safe_itanium_class (insn);
6071 dep_class = ia64_safe_itanium_class (dep_insn);
6072 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6073 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6079 /* Like emit_insn_before, but skip cycle_display notes.
6080 ??? When cycle display notes are implemented, update this. */
6083 ia64_emit_insn_before (rtx insn, rtx before)
6085 emit_insn_before (insn, before);
6088 /* The following function marks insns who produce addresses for load
6089 and store insns. Such insns will be placed into M slots because it
6090 decrease latency time for Itanium1 (see function
6091 `ia64_produce_address_p' and the DFA descriptions). */
6094 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6096 rtx insn, link, next, next_tail;
6098 next_tail = NEXT_INSN (tail);
6099 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6102 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6104 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6106 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6108 next = XEXP (link, 0);
6109 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
6110 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
6111 && ia64_st_address_bypass_p (insn, next))
6113 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
6114 || ia64_safe_itanium_class (next)
6115 == ITANIUM_CLASS_FLD)
6116 && ia64_ld_address_bypass_p (insn, next))
6119 insn->call = link != 0;
6123 /* We're beginning a new block. Initialize data structures as necessary. */
6126 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6127 int sched_verbose ATTRIBUTE_UNUSED,
6128 int max_ready ATTRIBUTE_UNUSED)
6130 #ifdef ENABLE_CHECKING
6133 if (reload_completed)
6134 for (insn = NEXT_INSN (current_sched_info->prev_head);
6135 insn != current_sched_info->next_tail;
6136 insn = NEXT_INSN (insn))
6137 if (SCHED_GROUP_P (insn))
6140 last_scheduled_insn = NULL_RTX;
6141 init_insn_group_barriers ();
6144 /* We are about to being issuing insns for this clock cycle.
6145 Override the default sort algorithm to better slot instructions. */
6148 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6149 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6153 int n_ready = *pn_ready;
6154 rtx *e_ready = ready + n_ready;
6158 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6160 if (reorder_type == 0)
6162 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6164 for (insnp = ready; insnp < e_ready; insnp++)
6165 if (insnp < e_ready)
6168 enum attr_type t = ia64_safe_type (insn);
6169 if (t == TYPE_UNKNOWN)
6171 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6172 || asm_noperands (PATTERN (insn)) >= 0)
6174 rtx lowest = ready[n_asms];
6175 ready[n_asms] = insn;
6181 rtx highest = ready[n_ready - 1];
6182 ready[n_ready - 1] = insn;
6189 if (n_asms < n_ready)
6191 /* Some normal insns to process. Skip the asms. */
6195 else if (n_ready > 0)
6199 if (ia64_final_schedule)
6202 int nr_need_stop = 0;
6204 for (insnp = ready; insnp < e_ready; insnp++)
6205 if (safe_group_barrier_needed_p (*insnp))
6208 if (reorder_type == 1 && n_ready == nr_need_stop)
6210 if (reorder_type == 0)
6213 /* Move down everything that needs a stop bit, preserving
6215 while (insnp-- > ready + deleted)
6216 while (insnp >= ready + deleted)
6219 if (! safe_group_barrier_needed_p (insn))
6221 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6232 /* We are about to being issuing insns for this clock cycle. Override
6233 the default sort algorithm to better slot instructions. */
6236 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6239 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6240 pn_ready, clock_var, 0);
6243 /* Like ia64_sched_reorder, but called after issuing each insn.
6244 Override the default sort algorithm to better slot instructions. */
6247 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6248 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6249 int *pn_ready, int clock_var)
6251 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6252 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6253 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6257 /* We are about to issue INSN. Return the number of insns left on the
6258 ready queue that can be issued this cycle. */
6261 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6262 int sched_verbose ATTRIBUTE_UNUSED,
6263 rtx insn ATTRIBUTE_UNUSED,
6264 int can_issue_more ATTRIBUTE_UNUSED)
6266 last_scheduled_insn = insn;
6267 memcpy (prev_cycle_state, curr_state, dfa_state_size);
6268 if (reload_completed)
6270 if (group_barrier_needed_p (insn))
6272 if (GET_CODE (insn) == CALL_INSN)
6273 init_insn_group_barriers ();
6274 stops_p [INSN_UID (insn)] = stop_before_p;
6280 /* We are choosing insn from the ready queue. Return nonzero if INSN
6284 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6286 if (insn == NULL_RTX || !INSN_P (insn))
6288 return (!reload_completed
6289 || !safe_group_barrier_needed_p (insn));
6292 /* The following variable value is pseudo-insn used by the DFA insn
6293 scheduler to change the DFA state when the simulated clock is
6296 static rtx dfa_pre_cycle_insn;
6298 /* We are about to being issuing INSN. Return nonzero if we can not
6299 issue it on given cycle CLOCK and return zero if we should not sort
6300 the ready queue on the next clock start. */
6303 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6304 int clock, int *sort_p)
6306 int setup_clocks_p = FALSE;
6308 if (insn == NULL_RTX || !INSN_P (insn))
6310 if ((reload_completed && safe_group_barrier_needed_p (insn))
6311 || (last_scheduled_insn
6312 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6313 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6314 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6316 init_insn_group_barriers ();
6317 if (verbose && dump)
6318 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6319 last_clock == clock ? " + cycle advance" : "");
6321 if (last_clock == clock)
6323 state_transition (curr_state, dfa_stop_insn);
6324 if (TARGET_EARLY_STOP_BITS)
6325 *sort_p = (last_scheduled_insn == NULL_RTX
6326 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6331 else if (reload_completed)
6332 setup_clocks_p = TRUE;
6333 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6334 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6335 state_reset (curr_state);
6338 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6339 state_transition (curr_state, dfa_stop_insn);
6340 state_transition (curr_state, dfa_pre_cycle_insn);
6341 state_transition (curr_state, NULL);
6344 else if (reload_completed)
6345 setup_clocks_p = TRUE;
6346 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6347 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6348 && asm_noperands (PATTERN (insn)) < 0)
6350 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6352 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6357 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6358 if (REG_NOTE_KIND (link) == 0)
6360 enum attr_itanium_class dep_class;
6361 rtx dep_insn = XEXP (link, 0);
6363 dep_class = ia64_safe_itanium_class (dep_insn);
6364 if ((dep_class == ITANIUM_CLASS_MMMUL
6365 || dep_class == ITANIUM_CLASS_MMSHF)
6366 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6368 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6369 d = last_clock - clocks [INSN_UID (dep_insn)];
6372 add_cycles [INSN_UID (insn)] = 3 - d;
6380 /* The following page contains abstract data `bundle states' which are
6381 used for bundling insns (inserting nops and template generation). */
6383 /* The following describes state of insn bundling. */
6387 /* Unique bundle state number to identify them in the debugging
6390 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
6391 /* number nops before and after the insn */
6392 short before_nops_num, after_nops_num;
6393 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6395 int cost; /* cost of the state in cycles */
6396 int accumulated_insns_num; /* number of all previous insns including
6397 nops. L is considered as 2 insns */
6398 int branch_deviation; /* deviation of previous branches from 3rd slots */
6399 struct bundle_state *next; /* next state with the same insn_num */
6400 struct bundle_state *originator; /* originator (previous insn state) */
6401 /* All bundle states are in the following chain. */
6402 struct bundle_state *allocated_states_chain;
6403 /* The DFA State after issuing the insn and the nops. */
6407 /* The following is map insn number to the corresponding bundle state. */
6409 static struct bundle_state **index_to_bundle_states;
6411 /* The unique number of next bundle state. */
6413 static int bundle_states_num;
6415 /* All allocated bundle states are in the following chain. */
6417 static struct bundle_state *allocated_bundle_states_chain;
6419 /* All allocated but not used bundle states are in the following
6422 static struct bundle_state *free_bundle_state_chain;
6425 /* The following function returns a free bundle state. */
6427 static struct bundle_state *
6428 get_free_bundle_state (void)
6430 struct bundle_state *result;
6432 if (free_bundle_state_chain != NULL)
6434 result = free_bundle_state_chain;
6435 free_bundle_state_chain = result->next;
6439 result = xmalloc (sizeof (struct bundle_state));
6440 result->dfa_state = xmalloc (dfa_state_size);
6441 result->allocated_states_chain = allocated_bundle_states_chain;
6442 allocated_bundle_states_chain = result;
6444 result->unique_num = bundle_states_num++;
6449 /* The following function frees given bundle state. */
6452 free_bundle_state (struct bundle_state *state)
6454 state->next = free_bundle_state_chain;
6455 free_bundle_state_chain = state;
6458 /* Start work with abstract data `bundle states'. */
6461 initiate_bundle_states (void)
6463 bundle_states_num = 0;
6464 free_bundle_state_chain = NULL;
6465 allocated_bundle_states_chain = NULL;
6468 /* Finish work with abstract data `bundle states'. */
6471 finish_bundle_states (void)
6473 struct bundle_state *curr_state, *next_state;
6475 for (curr_state = allocated_bundle_states_chain;
6477 curr_state = next_state)
6479 next_state = curr_state->allocated_states_chain;
6480 free (curr_state->dfa_state);
6485 /* Hash table of the bundle states. The key is dfa_state and insn_num
6486 of the bundle states. */
6488 static htab_t bundle_state_table;
6490 /* The function returns hash of BUNDLE_STATE. */
6493 bundle_state_hash (const void *bundle_state)
6495 const struct bundle_state *state = (struct bundle_state *) bundle_state;
6498 for (result = i = 0; i < dfa_state_size; i++)
6499 result += (((unsigned char *) state->dfa_state) [i]
6500 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6501 return result + state->insn_num;
6504 /* The function returns nonzero if the bundle state keys are equal. */
6507 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
6509 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6510 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6512 return (state1->insn_num == state2->insn_num
6513 && memcmp (state1->dfa_state, state2->dfa_state,
6514 dfa_state_size) == 0);
6517 /* The function inserts the BUNDLE_STATE into the hash table. The
6518 function returns nonzero if the bundle has been inserted into the
6519 table. The table contains the best bundle state with given key. */
6522 insert_bundle_state (struct bundle_state *bundle_state)
6526 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6527 if (*entry_ptr == NULL)
6529 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6530 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6531 *entry_ptr = (void *) bundle_state;
6534 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6535 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6536 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6537 > bundle_state->accumulated_insns_num
6538 || (((struct bundle_state *)
6539 *entry_ptr)->accumulated_insns_num
6540 == bundle_state->accumulated_insns_num
6541 && ((struct bundle_state *)
6542 *entry_ptr)->branch_deviation
6543 > bundle_state->branch_deviation))))
6546 struct bundle_state temp;
6548 temp = *(struct bundle_state *) *entry_ptr;
6549 *(struct bundle_state *) *entry_ptr = *bundle_state;
6550 ((struct bundle_state *) *entry_ptr)->next = temp.next;
6551 *bundle_state = temp;
6556 /* Start work with the hash table. */
6559 initiate_bundle_state_table (void)
6561 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6565 /* Finish work with the hash table. */
6568 finish_bundle_state_table (void)
6570 htab_delete (bundle_state_table);
6575 /* The following variable is a insn `nop' used to check bundle states
6576 with different number of inserted nops. */
6578 static rtx ia64_nop;
6580 /* The following function tries to issue NOPS_NUM nops for the current
6581 state without advancing processor cycle. If it failed, the
6582 function returns FALSE and frees the current state. */
6585 try_issue_nops (struct bundle_state *curr_state, int nops_num)
6589 for (i = 0; i < nops_num; i++)
6590 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6592 free_bundle_state (curr_state);
6598 /* The following function tries to issue INSN for the current
6599 state without advancing processor cycle. If it failed, the
6600 function returns FALSE and frees the current state. */
6603 try_issue_insn (struct bundle_state *curr_state, rtx insn)
6605 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6607 free_bundle_state (curr_state);
6613 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6614 starting with ORIGINATOR without advancing processor cycle. If
6615 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6616 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6617 If it was successful, the function creates new bundle state and
6618 insert into the hash table and into `index_to_bundle_states'. */
6621 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6622 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6624 struct bundle_state *curr_state;
6626 curr_state = get_free_bundle_state ();
6627 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6628 curr_state->insn = insn;
6629 curr_state->insn_num = originator->insn_num + 1;
6630 curr_state->cost = originator->cost;
6631 curr_state->originator = originator;
6632 curr_state->before_nops_num = before_nops_num;
6633 curr_state->after_nops_num = 0;
6634 curr_state->accumulated_insns_num
6635 = originator->accumulated_insns_num + before_nops_num;
6636 curr_state->branch_deviation = originator->branch_deviation;
6637 if (insn == NULL_RTX)
6639 else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6641 if (GET_MODE (insn) == TImode)
6643 if (!try_issue_nops (curr_state, before_nops_num))
6645 if (!try_issue_insn (curr_state, insn))
6647 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6648 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6649 && curr_state->accumulated_insns_num % 3 != 0)
6651 free_bundle_state (curr_state);
6655 else if (GET_MODE (insn) != TImode)
6657 if (!try_issue_nops (curr_state, before_nops_num))
6659 if (!try_issue_insn (curr_state, insn))
6661 curr_state->accumulated_insns_num++;
6662 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6663 || asm_noperands (PATTERN (insn)) >= 0)
6665 if (ia64_safe_type (insn) == TYPE_L)
6666 curr_state->accumulated_insns_num++;
6670 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6671 state_transition (curr_state->dfa_state, NULL);
6673 if (!try_issue_nops (curr_state, before_nops_num))
6675 if (!try_issue_insn (curr_state, insn))
6677 curr_state->accumulated_insns_num++;
6678 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6679 || asm_noperands (PATTERN (insn)) >= 0)
6681 /* Finish bundle containing asm insn. */
6682 curr_state->after_nops_num
6683 = 3 - curr_state->accumulated_insns_num % 3;
6684 curr_state->accumulated_insns_num
6685 += 3 - curr_state->accumulated_insns_num % 3;
6687 else if (ia64_safe_type (insn) == TYPE_L)
6688 curr_state->accumulated_insns_num++;
6690 if (ia64_safe_type (insn) == TYPE_B)
6691 curr_state->branch_deviation
6692 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6693 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6695 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6698 struct bundle_state *curr_state1;
6699 struct bundle_state *allocated_states_chain;
6701 curr_state1 = get_free_bundle_state ();
6702 dfa_state = curr_state1->dfa_state;
6703 allocated_states_chain = curr_state1->allocated_states_chain;
6704 *curr_state1 = *curr_state;
6705 curr_state1->dfa_state = dfa_state;
6706 curr_state1->allocated_states_chain = allocated_states_chain;
6707 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6709 curr_state = curr_state1;
6711 if (!try_issue_nops (curr_state,
6712 3 - curr_state->accumulated_insns_num % 3))
6714 curr_state->after_nops_num
6715 = 3 - curr_state->accumulated_insns_num % 3;
6716 curr_state->accumulated_insns_num
6717 += 3 - curr_state->accumulated_insns_num % 3;
6719 if (!insert_bundle_state (curr_state))
6720 free_bundle_state (curr_state);
6724 /* The following function returns position in the two window bundle
6728 get_max_pos (state_t state)
6730 if (cpu_unit_reservation_p (state, pos_6))
6732 else if (cpu_unit_reservation_p (state, pos_5))
6734 else if (cpu_unit_reservation_p (state, pos_4))
6736 else if (cpu_unit_reservation_p (state, pos_3))
6738 else if (cpu_unit_reservation_p (state, pos_2))
6740 else if (cpu_unit_reservation_p (state, pos_1))
6746 /* The function returns code of a possible template for given position
6747 and state. The function should be called only with 2 values of
6748 position equal to 3 or 6. */
6751 get_template (state_t state, int pos)
6756 if (cpu_unit_reservation_p (state, _0mii_))
6758 else if (cpu_unit_reservation_p (state, _0mmi_))
6760 else if (cpu_unit_reservation_p (state, _0mfi_))
6762 else if (cpu_unit_reservation_p (state, _0mmf_))
6764 else if (cpu_unit_reservation_p (state, _0bbb_))
6766 else if (cpu_unit_reservation_p (state, _0mbb_))
6768 else if (cpu_unit_reservation_p (state, _0mib_))
6770 else if (cpu_unit_reservation_p (state, _0mmb_))
6772 else if (cpu_unit_reservation_p (state, _0mfb_))
6774 else if (cpu_unit_reservation_p (state, _0mlx_))
6779 if (cpu_unit_reservation_p (state, _1mii_))
6781 else if (cpu_unit_reservation_p (state, _1mmi_))
6783 else if (cpu_unit_reservation_p (state, _1mfi_))
6785 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6787 else if (cpu_unit_reservation_p (state, _1bbb_))
6789 else if (cpu_unit_reservation_p (state, _1mbb_))
6791 else if (cpu_unit_reservation_p (state, _1mib_))
6793 else if (cpu_unit_reservation_p (state, _1mmb_))
6795 else if (cpu_unit_reservation_p (state, _1mfb_))
6797 else if (cpu_unit_reservation_p (state, _1mlx_))
6806 /* The following function returns an insn important for insn bundling
6807 followed by INSN and before TAIL. */
6810 get_next_important_insn (rtx insn, rtx tail)
6812 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6814 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6815 && GET_CODE (PATTERN (insn)) != USE
6816 && GET_CODE (PATTERN (insn)) != CLOBBER)
6821 /* The following function does insn bundling. Bundling means
6822 inserting templates and nop insns to fit insn groups into permitted
6823 templates. Instruction scheduling uses NDFA (non-deterministic
6824 finite automata) encoding informations about the templates and the
6825 inserted nops. Nondeterminism of the automata permits follows
6826 all possible insn sequences very fast.
6828 Unfortunately it is not possible to get information about inserting
6829 nop insns and used templates from the automata states. The
6830 automata only says that we can issue an insn possibly inserting
6831 some nops before it and using some template. Therefore insn
6832 bundling in this function is implemented by using DFA
6833 (deterministic finite automata). We follows all possible insn
6834 sequences by inserting 0-2 nops (that is what the NDFA describe for
6835 insn scheduling) before/after each insn being bundled. We know the
6836 start of simulated processor cycle from insn scheduling (insn
6837 starting a new cycle has TImode).
6839 Simple implementation of insn bundling would create enormous
6840 number of possible insn sequences satisfying information about new
6841 cycle ticks taken from the insn scheduling. To make the algorithm
6842 practical we use dynamic programming. Each decision (about
6843 inserting nops and implicitly about previous decisions) is described
6844 by structure bundle_state (see above). If we generate the same
6845 bundle state (key is automaton state after issuing the insns and
6846 nops for it), we reuse already generated one. As consequence we
6847 reject some decisions which can not improve the solution and
6848 reduce memory for the algorithm.
6850 When we reach the end of EBB (extended basic block), we choose the
6851 best sequence and then, moving back in EBB, insert templates for
6852 the best alternative. The templates are taken from querying
6853 automaton state for each insn in chosen bundle states.
6855 So the algorithm makes two (forward and backward) passes through
6856 EBB. There is an additional forward pass through EBB for Itanium1
6857 processor. This pass inserts more nops to make dependency between
6858 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6861 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6863 struct bundle_state *curr_state, *next_state, *best_state;
6864 rtx insn, next_insn;
6866 int i, bundle_end_p, only_bundle_end_p, asm_p;
6867 int pos = 0, max_pos, template0, template1;
6870 enum attr_type type;
6873 /* Count insns in the EBB. */
6874 for (insn = NEXT_INSN (prev_head_insn);
6875 insn && insn != tail;
6876 insn = NEXT_INSN (insn))
6882 dfa_clean_insn_cache ();
6883 initiate_bundle_state_table ();
6884 index_to_bundle_states = xmalloc ((insn_num + 2)
6885 * sizeof (struct bundle_state *));
6886 /* First (forward) pass -- generation of bundle states. */
6887 curr_state = get_free_bundle_state ();
6888 curr_state->insn = NULL;
6889 curr_state->before_nops_num = 0;
6890 curr_state->after_nops_num = 0;
6891 curr_state->insn_num = 0;
6892 curr_state->cost = 0;
6893 curr_state->accumulated_insns_num = 0;
6894 curr_state->branch_deviation = 0;
6895 curr_state->next = NULL;
6896 curr_state->originator = NULL;
6897 state_reset (curr_state->dfa_state);
6898 index_to_bundle_states [0] = curr_state;
6900 /* Shift cycle mark if it is put on insn which could be ignored. */
6901 for (insn = NEXT_INSN (prev_head_insn);
6903 insn = NEXT_INSN (insn))
6905 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6906 || GET_CODE (PATTERN (insn)) == USE
6907 || GET_CODE (PATTERN (insn)) == CLOBBER)
6908 && GET_MODE (insn) == TImode)
6910 PUT_MODE (insn, VOIDmode);
6911 for (next_insn = NEXT_INSN (insn);
6913 next_insn = NEXT_INSN (next_insn))
6914 if (INSN_P (next_insn)
6915 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6916 && GET_CODE (PATTERN (next_insn)) != USE
6917 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6919 PUT_MODE (next_insn, TImode);
6923 /* Froward pass: generation of bundle states. */
6924 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6929 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6930 || GET_CODE (PATTERN (insn)) == USE
6931 || GET_CODE (PATTERN (insn)) == CLOBBER)
6933 type = ia64_safe_type (insn);
6934 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6936 index_to_bundle_states [insn_num] = NULL;
6937 for (curr_state = index_to_bundle_states [insn_num - 1];
6939 curr_state = next_state)
6941 pos = curr_state->accumulated_insns_num % 3;
6942 next_state = curr_state->next;
6943 /* We must fill up the current bundle in order to start a
6944 subsequent asm insn in a new bundle. Asm insn is always
6945 placed in a separate bundle. */
6947 = (next_insn != NULL_RTX
6948 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6949 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6950 /* We may fill up the current bundle if it is the cycle end
6951 without a group barrier. */
6953 = (only_bundle_end_p || next_insn == NULL_RTX
6954 || (GET_MODE (next_insn) == TImode
6955 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6956 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6958 /* We need to insert 2 nops for cases like M_MII. To
6959 guarantee issuing all insns on the same cycle for
6960 Itanium 1, we need to issue 2 nops after the first M
6961 insn (MnnMII where n is a nop insn). */
6962 || ((type == TYPE_M || type == TYPE_A)
6963 && ia64_tune == PROCESSOR_ITANIUM
6964 && !bundle_end_p && pos == 1))
6965 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6967 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6969 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6972 if (index_to_bundle_states [insn_num] == NULL)
6974 for (curr_state = index_to_bundle_states [insn_num];
6976 curr_state = curr_state->next)
6977 if (verbose >= 2 && dump)
6979 /* This structure is taken from generated code of the
6980 pipeline hazard recognizer (see file insn-attrtab.c).
6981 Please don't forget to change the structure if a new
6982 automaton is added to .md file. */
6985 unsigned short one_automaton_state;
6986 unsigned short oneb_automaton_state;
6987 unsigned short two_automaton_state;
6988 unsigned short twob_automaton_state;
6993 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6994 curr_state->unique_num,
6995 (curr_state->originator == NULL
6996 ? -1 : curr_state->originator->unique_num),
6998 curr_state->before_nops_num, curr_state->after_nops_num,
6999 curr_state->accumulated_insns_num, curr_state->branch_deviation,
7000 (ia64_tune == PROCESSOR_ITANIUM
7001 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7002 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7006 if (index_to_bundle_states [insn_num] == NULL)
7007 /* We should find a solution because the 2nd insn scheduling has
7010 /* Find a state corresponding to the best insn sequence. */
7012 for (curr_state = index_to_bundle_states [insn_num];
7014 curr_state = curr_state->next)
7015 /* We are just looking at the states with fully filled up last
7016 bundle. The first we prefer insn sequences with minimal cost
7017 then with minimal inserted nops and finally with branch insns
7018 placed in the 3rd slots. */
7019 if (curr_state->accumulated_insns_num % 3 == 0
7020 && (best_state == NULL || best_state->cost > curr_state->cost
7021 || (best_state->cost == curr_state->cost
7022 && (curr_state->accumulated_insns_num
7023 < best_state->accumulated_insns_num
7024 || (curr_state->accumulated_insns_num
7025 == best_state->accumulated_insns_num
7026 && curr_state->branch_deviation
7027 < best_state->branch_deviation)))))
7028 best_state = curr_state;
7029 /* Second (backward) pass: adding nops and templates. */
7030 insn_num = best_state->before_nops_num;
7031 template0 = template1 = -1;
7032 for (curr_state = best_state;
7033 curr_state->originator != NULL;
7034 curr_state = curr_state->originator)
7036 insn = curr_state->insn;
7037 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
7038 || asm_noperands (PATTERN (insn)) >= 0);
7040 if (verbose >= 2 && dump)
7044 unsigned short one_automaton_state;
7045 unsigned short oneb_automaton_state;
7046 unsigned short two_automaton_state;
7047 unsigned short twob_automaton_state;
7052 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
7053 curr_state->unique_num,
7054 (curr_state->originator == NULL
7055 ? -1 : curr_state->originator->unique_num),
7057 curr_state->before_nops_num, curr_state->after_nops_num,
7058 curr_state->accumulated_insns_num, curr_state->branch_deviation,
7059 (ia64_tune == PROCESSOR_ITANIUM
7060 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7061 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7064 /* Find the position in the current bundle window. The window can
7065 contain at most two bundles. Two bundle window means that
7066 the processor will make two bundle rotation. */
7067 max_pos = get_max_pos (curr_state->dfa_state);
7069 /* The following (negative template number) means that the
7070 processor did one bundle rotation. */
7071 || (max_pos == 3 && template0 < 0))
7073 /* We are at the end of the window -- find template(s) for
7077 template0 = get_template (curr_state->dfa_state, 3);
7080 template1 = get_template (curr_state->dfa_state, 3);
7081 template0 = get_template (curr_state->dfa_state, 6);
7084 if (max_pos > 3 && template1 < 0)
7085 /* It may happen when we have the stop inside a bundle. */
7089 template1 = get_template (curr_state->dfa_state, 3);
7093 /* Emit nops after the current insn. */
7094 for (i = 0; i < curr_state->after_nops_num; i++)
7097 emit_insn_after (nop, insn);
7103 /* We are at the start of a bundle: emit the template
7104 (it should be defined). */
7107 b = gen_bundle_selector (GEN_INT (template0));
7108 ia64_emit_insn_before (b, nop);
7109 /* If we have two bundle window, we make one bundle
7110 rotation. Otherwise template0 will be undefined
7111 (negative value). */
7112 template0 = template1;
7116 /* Move the position backward in the window. Group barrier has
7117 no slot. Asm insn takes all bundle. */
7118 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7119 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7120 && asm_noperands (PATTERN (insn)) < 0)
7122 /* Long insn takes 2 slots. */
7123 if (ia64_safe_type (insn) == TYPE_L)
7128 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7129 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7130 && asm_noperands (PATTERN (insn)) < 0)
7132 /* The current insn is at the bundle start: emit the
7136 b = gen_bundle_selector (GEN_INT (template0));
7137 ia64_emit_insn_before (b, insn);
7138 b = PREV_INSN (insn);
7140 /* See comment above in analogous place for emitting nops
7142 template0 = template1;
7145 /* Emit nops after the current insn. */
7146 for (i = 0; i < curr_state->before_nops_num; i++)
7149 ia64_emit_insn_before (nop, insn);
7150 nop = PREV_INSN (insn);
7157 /* See comment above in analogous place for emitting nops
7161 b = gen_bundle_selector (GEN_INT (template0));
7162 ia64_emit_insn_before (b, insn);
7163 b = PREV_INSN (insn);
7165 template0 = template1;
7170 if (ia64_tune == PROCESSOR_ITANIUM)
7171 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
7172 Itanium1 has a strange design, if the distance between an insn
7173 and dependent MM-insn is less 4 then we have a 6 additional
7174 cycles stall. So we make the distance equal to 4 cycles if it
7176 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7181 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
7182 || GET_CODE (PATTERN (insn)) == USE
7183 || GET_CODE (PATTERN (insn)) == CLOBBER)
7185 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7186 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
7187 /* We found a MM-insn which needs additional cycles. */
7193 /* Now we are searching for a template of the bundle in
7194 which the MM-insn is placed and the position of the
7195 insn in the bundle (0, 1, 2). Also we are searching
7196 for that there is a stop before the insn. */
7197 last = prev_active_insn (insn);
7198 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
7200 last = prev_active_insn (last);
7202 for (;; last = prev_active_insn (last))
7203 if (recog_memoized (last) == CODE_FOR_bundle_selector)
7205 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
7207 /* The insn is in MLX bundle. Change the template
7208 onto MFI because we will add nops before the
7209 insn. It simplifies subsequent code a lot. */
7211 = gen_bundle_selector (const2_rtx); /* -> MFI */
7214 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
7215 && (ia64_safe_itanium_class (last)
7216 != ITANIUM_CLASS_IGNORE))
7218 /* Some check of correctness: the stop is not at the
7219 bundle start, there are no more 3 insns in the bundle,
7220 and the MM-insn is not at the start of bundle with
7222 if ((pred_stop_p && n == 0) || n > 2
7223 || (template0 == 9 && n != 0))
7225 /* Put nops after the insn in the bundle. */
7226 for (j = 3 - n; j > 0; j --)
7227 ia64_emit_insn_before (gen_nop (), insn);
7228 /* It takes into account that we will add more N nops
7229 before the insn lately -- please see code below. */
7230 add_cycles [INSN_UID (insn)]--;
7231 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
7232 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7235 add_cycles [INSN_UID (insn)]--;
7236 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
7238 /* Insert "MII;" template. */
7239 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
7241 ia64_emit_insn_before (gen_nop (), insn);
7242 ia64_emit_insn_before (gen_nop (), insn);
7245 /* To decrease code size, we use "MI;I;"
7247 ia64_emit_insn_before
7248 (gen_insn_group_barrier (GEN_INT (3)), insn);
7251 ia64_emit_insn_before (gen_nop (), insn);
7252 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7255 /* Put the MM-insn in the same slot of a bundle with the
7256 same template as the original one. */
7257 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
7259 /* To put the insn in the same slot, add necessary number
7261 for (j = n; j > 0; j --)
7262 ia64_emit_insn_before (gen_nop (), insn);
7263 /* Put the stop if the original bundle had it. */
7265 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7269 free (index_to_bundle_states);
7270 finish_bundle_state_table ();
7272 dfa_clean_insn_cache ();
7275 /* The following function is called at the end of scheduling BB or
7276 EBB. After reload, it inserts stop bits and does insn bundling. */
7279 ia64_sched_finish (FILE *dump, int sched_verbose)
7282 fprintf (dump, "// Finishing schedule.\n");
7283 if (!reload_completed)
7285 if (reload_completed)
7287 final_emit_insn_group_barriers (dump);
7288 bundling (dump, sched_verbose, current_sched_info->prev_head,
7289 current_sched_info->next_tail);
7290 if (sched_verbose && dump)
7291 fprintf (dump, "// finishing %d-%d\n",
7292 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
7293 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
7299 /* The following function inserts stop bits in scheduled BB or EBB. */
7302 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7305 int need_barrier_p = 0;
7306 rtx prev_insn = NULL_RTX;
7308 init_insn_group_barriers ();
7310 for (insn = NEXT_INSN (current_sched_info->prev_head);
7311 insn != current_sched_info->next_tail;
7312 insn = NEXT_INSN (insn))
7314 if (GET_CODE (insn) == BARRIER)
7316 rtx last = prev_active_insn (insn);
7320 if (GET_CODE (last) == JUMP_INSN
7321 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7322 last = prev_active_insn (last);
7323 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7324 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7326 init_insn_group_barriers ();
7328 prev_insn = NULL_RTX;
7330 else if (INSN_P (insn))
7332 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7334 init_insn_group_barriers ();
7336 prev_insn = NULL_RTX;
7338 else if (need_barrier_p || group_barrier_needed_p (insn))
7340 if (TARGET_EARLY_STOP_BITS)
7345 last != current_sched_info->prev_head;
7346 last = PREV_INSN (last))
7347 if (INSN_P (last) && GET_MODE (last) == TImode
7348 && stops_p [INSN_UID (last)])
7350 if (last == current_sched_info->prev_head)
7352 last = prev_active_insn (last);
7354 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
7355 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7357 init_insn_group_barriers ();
7358 for (last = NEXT_INSN (last);
7360 last = NEXT_INSN (last))
7362 group_barrier_needed_p (last);
7366 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7368 init_insn_group_barriers ();
7370 group_barrier_needed_p (insn);
7371 prev_insn = NULL_RTX;
7373 else if (recog_memoized (insn) >= 0)
7375 need_barrier_p = (GET_CODE (insn) == CALL_INSN
7376 || GET_CODE (PATTERN (insn)) == ASM_INPUT
7377 || asm_noperands (PATTERN (insn)) >= 0);
7384 /* If the following function returns TRUE, we will use the the DFA
7388 ia64_first_cycle_multipass_dfa_lookahead (void)
7390 return (reload_completed ? 6 : 4);
7393 /* The following function initiates variable `dfa_pre_cycle_insn'. */
7396 ia64_init_dfa_pre_cycle_insn (void)
7398 if (temp_dfa_state == NULL)
7400 dfa_state_size = state_size ();
7401 temp_dfa_state = xmalloc (dfa_state_size);
7402 prev_cycle_state = xmalloc (dfa_state_size);
7404 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7405 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7406 recog_memoized (dfa_pre_cycle_insn);
7407 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7408 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7409 recog_memoized (dfa_stop_insn);
7412 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7413 used by the DFA insn scheduler. */
7416 ia64_dfa_pre_cycle_insn (void)
7418 return dfa_pre_cycle_insn;
7421 /* The following function returns TRUE if PRODUCER (of type ilog or
7422 ld) produces address for CONSUMER (of type st or stf). */
7425 ia64_st_address_bypass_p (rtx producer, rtx consumer)
7429 if (producer == NULL_RTX || consumer == NULL_RTX)
7431 dest = ia64_single_set (producer);
7432 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7433 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7435 if (GET_CODE (reg) == SUBREG)
7436 reg = SUBREG_REG (reg);
7437 dest = ia64_single_set (consumer);
7438 if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
7439 || GET_CODE (mem) != MEM)
7441 return reg_mentioned_p (reg, mem);
7444 /* The following function returns TRUE if PRODUCER (of type ilog or
7445 ld) produces address for CONSUMER (of type ld or fld). */
7448 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
7450 rtx dest, src, reg, mem;
7452 if (producer == NULL_RTX || consumer == NULL_RTX)
7454 dest = ia64_single_set (producer);
7455 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7456 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7458 if (GET_CODE (reg) == SUBREG)
7459 reg = SUBREG_REG (reg);
7460 src = ia64_single_set (consumer);
7461 if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
7463 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7464 mem = XVECEXP (mem, 0, 0);
7465 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7466 mem = XEXP (mem, 0);
7468 /* Note that LO_SUM is used for GOT loads. */
7469 if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
7472 return reg_mentioned_p (reg, mem);
7475 /* The following function returns TRUE if INSN produces address for a
7476 load/store insn. We will place such insns into M slot because it
7477 decreases its latency time. */
7480 ia64_produce_address_p (rtx insn)
7486 /* Emit pseudo-ops for the assembler to describe predicate relations.
7487 At present this assumes that we only consider predicate pairs to
7488 be mutex, and that the assembler can deduce proper values from
7489 straight-line code. */
7492 emit_predicate_relation_info (void)
7496 FOR_EACH_BB_REVERSE (bb)
7499 rtx head = BB_HEAD (bb);
7501 /* We only need such notes at code labels. */
7502 if (GET_CODE (head) != CODE_LABEL)
7504 if (GET_CODE (NEXT_INSN (head)) == NOTE
7505 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7506 head = NEXT_INSN (head);
7508 for (r = PR_REG (0); r < PR_REG (64); r += 2)
7509 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7511 rtx p = gen_rtx_REG (BImode, r);
7512 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7513 if (head == BB_END (bb))
7519 /* Look for conditional calls that do not return, and protect predicate
7520 relations around them. Otherwise the assembler will assume the call
7521 returns, and complain about uses of call-clobbered predicates after
7523 FOR_EACH_BB_REVERSE (bb)
7525 rtx insn = BB_HEAD (bb);
7529 if (GET_CODE (insn) == CALL_INSN
7530 && GET_CODE (PATTERN (insn)) == COND_EXEC
7531 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7533 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7534 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7535 if (BB_HEAD (bb) == insn)
7537 if (BB_END (bb) == insn)
7541 if (insn == BB_END (bb))
7543 insn = NEXT_INSN (insn);
7548 /* Perform machine dependent operations on the rtl chain INSNS. */
7553 /* We are freeing block_for_insn in the toplev to keep compatibility
7554 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7555 compute_bb_for_insn ();
7557 /* If optimizing, we'll have split before scheduling. */
7559 split_all_insns (0);
7561 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7562 non-optimizing bootstrap. */
7563 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7565 if (ia64_flag_schedule_insns2)
7567 timevar_push (TV_SCHED2);
7568 ia64_final_schedule = 1;
7570 initiate_bundle_states ();
7571 ia64_nop = make_insn_raw (gen_nop ());
7572 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7573 recog_memoized (ia64_nop);
7574 clocks_length = get_max_uid () + 1;
7575 stops_p = xcalloc (1, clocks_length);
7576 if (ia64_tune == PROCESSOR_ITANIUM)
7578 clocks = xcalloc (clocks_length, sizeof (int));
7579 add_cycles = xcalloc (clocks_length, sizeof (int));
7581 if (ia64_tune == PROCESSOR_ITANIUM2)
7583 pos_1 = get_cpu_unit_code ("2_1");
7584 pos_2 = get_cpu_unit_code ("2_2");
7585 pos_3 = get_cpu_unit_code ("2_3");
7586 pos_4 = get_cpu_unit_code ("2_4");
7587 pos_5 = get_cpu_unit_code ("2_5");
7588 pos_6 = get_cpu_unit_code ("2_6");
7589 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7590 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7591 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7592 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7593 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7594 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7595 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7596 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7597 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7598 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7599 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7600 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7601 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7602 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7603 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7604 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7605 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7606 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7607 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7608 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7612 pos_1 = get_cpu_unit_code ("1_1");
7613 pos_2 = get_cpu_unit_code ("1_2");
7614 pos_3 = get_cpu_unit_code ("1_3");
7615 pos_4 = get_cpu_unit_code ("1_4");
7616 pos_5 = get_cpu_unit_code ("1_5");
7617 pos_6 = get_cpu_unit_code ("1_6");
7618 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7619 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7620 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7621 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7622 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7623 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7624 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7625 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7626 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7627 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7628 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7629 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7630 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7631 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7632 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7633 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7634 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7635 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7636 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7637 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7639 schedule_ebbs (dump_file);
7640 finish_bundle_states ();
7641 if (ia64_tune == PROCESSOR_ITANIUM)
7647 emit_insn_group_barriers (dump_file);
7649 ia64_final_schedule = 0;
7650 timevar_pop (TV_SCHED2);
7653 emit_all_insn_group_barriers (dump_file);
7655 /* A call must not be the last instruction in a function, so that the
7656 return address is still within the function, so that unwinding works
7657 properly. Note that IA-64 differs from dwarf2 on this point. */
7658 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7663 insn = get_last_insn ();
7664 if (! INSN_P (insn))
7665 insn = prev_active_insn (insn);
7666 /* Skip over insns that expand to nothing. */
7667 while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7669 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7670 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7672 insn = prev_active_insn (insn);
7674 if (GET_CODE (insn) == CALL_INSN)
7677 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7678 emit_insn (gen_break_f ());
7679 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7684 emit_predicate_relation_info ();
7686 if (ia64_flag_var_tracking)
7688 timevar_push (TV_VAR_TRACKING);
7689 variable_tracking_main ();
7690 timevar_pop (TV_VAR_TRACKING);
7694 /* Return true if REGNO is used by the epilogue. */
7697 ia64_epilogue_uses (int regno)
7702 /* With a call to a function in another module, we will write a new
7703 value to "gp". After returning from such a call, we need to make
7704 sure the function restores the original gp-value, even if the
7705 function itself does not use the gp anymore. */
7706 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7708 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7709 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7710 /* For functions defined with the syscall_linkage attribute, all
7711 input registers are marked as live at all function exits. This
7712 prevents the register allocator from using the input registers,
7713 which in turn makes it possible to restart a system call after
7714 an interrupt without having to save/restore the input registers.
7715 This also prevents kernel data from leaking to application code. */
7716 return lookup_attribute ("syscall_linkage",
7717 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7720 /* Conditional return patterns can't represent the use of `b0' as
7721 the return address, so we force the value live this way. */
7725 /* Likewise for ar.pfs, which is used by br.ret. */
7733 /* Return true if REGNO is used by the frame unwinder. */
7736 ia64_eh_uses (int regno)
7738 if (! reload_completed)
7741 if (current_frame_info.reg_save_b0
7742 && regno == current_frame_info.reg_save_b0)
7744 if (current_frame_info.reg_save_pr
7745 && regno == current_frame_info.reg_save_pr)
7747 if (current_frame_info.reg_save_ar_pfs
7748 && regno == current_frame_info.reg_save_ar_pfs)
7750 if (current_frame_info.reg_save_ar_unat
7751 && regno == current_frame_info.reg_save_ar_unat)
7753 if (current_frame_info.reg_save_ar_lc
7754 && regno == current_frame_info.reg_save_ar_lc)
7760 /* Return true if this goes in small data/bss. */
7762 /* ??? We could also support own long data here. Generating movl/add/ld8
7763 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7764 code faster because there is one less load. This also includes incomplete
7765 types which can't go in sdata/sbss. */
7768 ia64_in_small_data_p (tree exp)
7770 if (TARGET_NO_SDATA)
7773 /* We want to merge strings, so we never consider them small data. */
7774 if (TREE_CODE (exp) == STRING_CST)
7777 /* Functions are never small data. */
7778 if (TREE_CODE (exp) == FUNCTION_DECL)
7781 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7783 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7784 if (strcmp (section, ".sdata") == 0
7785 || strcmp (section, ".sbss") == 0)
7790 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7792 /* If this is an incomplete type with size 0, then we can't put it
7793 in sdata because it might be too big when completed. */
7794 if (size > 0 && size <= ia64_section_threshold)
7801 /* Output assembly directives for prologue regions. */
7803 /* The current basic block number. */
7805 static bool last_block;
7807 /* True if we need a copy_state command at the start of the next block. */
7809 static bool need_copy_state;
7811 /* The function emits unwind directives for the start of an epilogue. */
7814 process_epilogue (void)
7816 /* If this isn't the last block of the function, then we need to label the
7817 current state, and copy it back in at the start of the next block. */
7821 fprintf (asm_out_file, "\t.label_state 1\n");
7822 need_copy_state = true;
7825 fprintf (asm_out_file, "\t.restore sp\n");
7828 /* This function processes a SET pattern looking for specific patterns
7829 which result in emitting an assembly directive required for unwinding. */
7832 process_set (FILE *asm_out_file, rtx pat)
7834 rtx src = SET_SRC (pat);
7835 rtx dest = SET_DEST (pat);
7836 int src_regno, dest_regno;
7838 /* Look for the ALLOC insn. */
7839 if (GET_CODE (src) == UNSPEC_VOLATILE
7840 && XINT (src, 1) == UNSPECV_ALLOC
7841 && GET_CODE (dest) == REG)
7843 dest_regno = REGNO (dest);
7845 /* If this isn't the final destination for ar.pfs, the alloc
7846 shouldn't have been marked frame related. */
7847 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7850 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7851 ia64_dbx_register_number (dest_regno));
7855 /* Look for SP = .... */
7856 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7858 if (GET_CODE (src) == PLUS)
7860 rtx op0 = XEXP (src, 0);
7861 rtx op1 = XEXP (src, 1);
7862 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7864 if (INTVAL (op1) < 0)
7865 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7868 process_epilogue ();
7873 else if (GET_CODE (src) == REG
7874 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7875 process_epilogue ();
7882 /* Register move we need to look at. */
7883 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7885 src_regno = REGNO (src);
7886 dest_regno = REGNO (dest);
7891 /* Saving return address pointer. */
7892 if (dest_regno != current_frame_info.reg_save_b0)
7894 fprintf (asm_out_file, "\t.save rp, r%d\n",
7895 ia64_dbx_register_number (dest_regno));
7899 if (dest_regno != current_frame_info.reg_save_pr)
7901 fprintf (asm_out_file, "\t.save pr, r%d\n",
7902 ia64_dbx_register_number (dest_regno));
7905 case AR_UNAT_REGNUM:
7906 if (dest_regno != current_frame_info.reg_save_ar_unat)
7908 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7909 ia64_dbx_register_number (dest_regno));
7913 if (dest_regno != current_frame_info.reg_save_ar_lc)
7915 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7916 ia64_dbx_register_number (dest_regno));
7919 case STACK_POINTER_REGNUM:
7920 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7921 || ! frame_pointer_needed)
7923 fprintf (asm_out_file, "\t.vframe r%d\n",
7924 ia64_dbx_register_number (dest_regno));
7928 /* Everything else should indicate being stored to memory. */
7933 /* Memory store we need to look at. */
7934 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7940 if (GET_CODE (XEXP (dest, 0)) == REG)
7942 base = XEXP (dest, 0);
7945 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7946 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7948 base = XEXP (XEXP (dest, 0), 0);
7949 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7954 if (base == hard_frame_pointer_rtx)
7956 saveop = ".savepsp";
7959 else if (base == stack_pointer_rtx)
7964 src_regno = REGNO (src);
7968 if (current_frame_info.reg_save_b0 != 0)
7970 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7974 if (current_frame_info.reg_save_pr != 0)
7976 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7980 if (current_frame_info.reg_save_ar_lc != 0)
7982 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7986 if (current_frame_info.reg_save_ar_pfs != 0)
7988 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7991 case AR_UNAT_REGNUM:
7992 if (current_frame_info.reg_save_ar_unat != 0)
7994 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
8001 fprintf (asm_out_file, "\t.save.g 0x%x\n",
8002 1 << (src_regno - GR_REG (4)));
8010 fprintf (asm_out_file, "\t.save.b 0x%x\n",
8011 1 << (src_regno - BR_REG (1)));
8018 fprintf (asm_out_file, "\t.save.f 0x%x\n",
8019 1 << (src_regno - FR_REG (2)));
8022 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
8023 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
8024 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
8025 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
8026 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
8027 1 << (src_regno - FR_REG (12)));
8039 /* This function looks at a single insn and emits any directives
8040 required to unwind this insn. */
8042 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
8044 if (flag_unwind_tables
8045 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
8049 if (GET_CODE (insn) == NOTE
8050 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
8052 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
8054 /* Restore unwind state from immediately before the epilogue. */
8055 if (need_copy_state)
8057 fprintf (asm_out_file, "\t.body\n");
8058 fprintf (asm_out_file, "\t.copy_state 1\n");
8059 need_copy_state = false;
8063 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
8066 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
8068 pat = XEXP (pat, 0);
8070 pat = PATTERN (insn);
8072 switch (GET_CODE (pat))
8075 process_set (asm_out_file, pat);
8081 int limit = XVECLEN (pat, 0);
8082 for (par_index = 0; par_index < limit; par_index++)
8084 rtx x = XVECEXP (pat, 0, par_index);
8085 if (GET_CODE (x) == SET)
8086 process_set (asm_out_file, x);
8099 ia64_init_builtins (void)
8101 tree psi_type_node = build_pointer_type (integer_type_node);
8102 tree pdi_type_node = build_pointer_type (long_integer_type_node);
8104 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
8105 tree si_ftype_psi_si_si
8106 = build_function_type_list (integer_type_node,
8107 psi_type_node, integer_type_node,
8108 integer_type_node, NULL_TREE);
8110 /* __sync_val_compare_and_swap_di */
8111 tree di_ftype_pdi_di_di
8112 = build_function_type_list (long_integer_type_node,
8113 pdi_type_node, long_integer_type_node,
8114 long_integer_type_node, NULL_TREE);
8115 /* __sync_bool_compare_and_swap_di */
8116 tree si_ftype_pdi_di_di
8117 = build_function_type_list (integer_type_node,
8118 pdi_type_node, long_integer_type_node,
8119 long_integer_type_node, NULL_TREE);
8120 /* __sync_synchronize */
8121 tree void_ftype_void
8122 = build_function_type (void_type_node, void_list_node);
8124 /* __sync_lock_test_and_set_si */
8125 tree si_ftype_psi_si
8126 = build_function_type_list (integer_type_node,
8127 psi_type_node, integer_type_node, NULL_TREE);
8129 /* __sync_lock_test_and_set_di */
8130 tree di_ftype_pdi_di
8131 = build_function_type_list (long_integer_type_node,
8132 pdi_type_node, long_integer_type_node,
8135 /* __sync_lock_release_si */
8137 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
8139 /* __sync_lock_release_di */
8141 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
8146 /* The __fpreg type. */
8147 fpreg_type = make_node (REAL_TYPE);
8148 /* ??? The back end should know to load/save __fpreg variables using
8149 the ldf.fill and stf.spill instructions. */
8150 TYPE_PRECISION (fpreg_type) = 96;
8151 layout_type (fpreg_type);
8152 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
8154 /* The __float80 type. */
8155 float80_type = make_node (REAL_TYPE);
8156 TYPE_PRECISION (float80_type) = 96;
8157 layout_type (float80_type);
8158 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
8160 /* The __float128 type. */
8163 tree float128_type = make_node (REAL_TYPE);
8164 TYPE_PRECISION (float128_type) = 128;
8165 layout_type (float128_type);
8166 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
8169 /* Under HPUX, this is a synonym for "long double". */
8170 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
8173 #define def_builtin(name, type, code) \
8174 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
8176 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
8177 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
8178 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
8179 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
8180 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
8181 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
8182 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
8183 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
8185 def_builtin ("__sync_synchronize", void_ftype_void,
8186 IA64_BUILTIN_SYNCHRONIZE);
8188 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
8189 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
8190 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
8191 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
8192 def_builtin ("__sync_lock_release_si", void_ftype_psi,
8193 IA64_BUILTIN_LOCK_RELEASE_SI);
8194 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
8195 IA64_BUILTIN_LOCK_RELEASE_DI);
8197 def_builtin ("__builtin_ia64_bsp",
8198 build_function_type (ptr_type_node, void_list_node),
8201 def_builtin ("__builtin_ia64_flushrs",
8202 build_function_type (void_type_node, void_list_node),
8203 IA64_BUILTIN_FLUSHRS);
8205 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
8206 IA64_BUILTIN_FETCH_AND_ADD_SI);
8207 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
8208 IA64_BUILTIN_FETCH_AND_SUB_SI);
8209 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
8210 IA64_BUILTIN_FETCH_AND_OR_SI);
8211 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
8212 IA64_BUILTIN_FETCH_AND_AND_SI);
8213 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
8214 IA64_BUILTIN_FETCH_AND_XOR_SI);
8215 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
8216 IA64_BUILTIN_FETCH_AND_NAND_SI);
8218 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
8219 IA64_BUILTIN_ADD_AND_FETCH_SI);
8220 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
8221 IA64_BUILTIN_SUB_AND_FETCH_SI);
8222 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
8223 IA64_BUILTIN_OR_AND_FETCH_SI);
8224 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
8225 IA64_BUILTIN_AND_AND_FETCH_SI);
8226 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
8227 IA64_BUILTIN_XOR_AND_FETCH_SI);
8228 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
8229 IA64_BUILTIN_NAND_AND_FETCH_SI);
8231 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
8232 IA64_BUILTIN_FETCH_AND_ADD_DI);
8233 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
8234 IA64_BUILTIN_FETCH_AND_SUB_DI);
8235 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
8236 IA64_BUILTIN_FETCH_AND_OR_DI);
8237 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
8238 IA64_BUILTIN_FETCH_AND_AND_DI);
8239 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
8240 IA64_BUILTIN_FETCH_AND_XOR_DI);
8241 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
8242 IA64_BUILTIN_FETCH_AND_NAND_DI);
8244 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
8245 IA64_BUILTIN_ADD_AND_FETCH_DI);
8246 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
8247 IA64_BUILTIN_SUB_AND_FETCH_DI);
8248 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
8249 IA64_BUILTIN_OR_AND_FETCH_DI);
8250 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
8251 IA64_BUILTIN_AND_AND_FETCH_DI);
8252 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
8253 IA64_BUILTIN_XOR_AND_FETCH_DI);
8254 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
8255 IA64_BUILTIN_NAND_AND_FETCH_DI);
8260 /* Expand fetch_and_op intrinsics. The basic code sequence is:
8268 cmpxchgsz.acq tmp = [ptr], tmp
8269 } while (tmp != ret)
8273 ia64_expand_fetch_and_op (optab binoptab, enum machine_mode mode,
8274 tree arglist, rtx target)
8276 rtx ret, label, tmp, ccv, insn, mem, value;
8279 arg0 = TREE_VALUE (arglist);
8280 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8281 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8282 #ifdef POINTERS_EXTEND_UNSIGNED
8283 if (GET_MODE(mem) != Pmode)
8284 mem = convert_memory_address (Pmode, mem);
8286 value = expand_expr (arg1, NULL_RTX, mode, 0);
8288 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8289 MEM_VOLATILE_P (mem) = 1;
8291 if (target && register_operand (target, mode))
8294 ret = gen_reg_rtx (mode);
8296 emit_insn (gen_mf ());
8298 /* Special case for fetchadd instructions. */
8299 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
8302 insn = gen_fetchadd_acq_si (ret, mem, value);
8304 insn = gen_fetchadd_acq_di (ret, mem, value);
8309 tmp = gen_reg_rtx (mode);
8310 /* ar.ccv must always be loaded with a zero-extended DImode value. */
8311 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8312 emit_move_insn (tmp, mem);
8314 label = gen_label_rtx ();
8316 emit_move_insn (ret, tmp);
8317 convert_move (ccv, tmp, /*unsignedp=*/1);
8319 /* Perform the specific operation. Special case NAND by noticing
8320 one_cmpl_optab instead. */
8321 if (binoptab == one_cmpl_optab)
8323 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8324 binoptab = and_optab;
8326 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
8329 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
8331 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
8334 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
8339 /* Expand op_and_fetch intrinsics. The basic code sequence is:
8346 ret = tmp <op> value;
8347 cmpxchgsz.acq tmp = [ptr], ret
8348 } while (tmp != old)
8352 ia64_expand_op_and_fetch (optab binoptab, enum machine_mode mode,
8353 tree arglist, rtx target)
8355 rtx old, label, tmp, ret, ccv, insn, mem, value;
8358 arg0 = TREE_VALUE (arglist);
8359 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8360 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8361 #ifdef POINTERS_EXTEND_UNSIGNED
8362 if (GET_MODE(mem) != Pmode)
8363 mem = convert_memory_address (Pmode, mem);
8366 value = expand_expr (arg1, NULL_RTX, mode, 0);
8368 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8369 MEM_VOLATILE_P (mem) = 1;
8371 if (target && ! register_operand (target, mode))
8374 emit_insn (gen_mf ());
8375 tmp = gen_reg_rtx (mode);
8376 old = gen_reg_rtx (mode);
8377 /* ar.ccv must always be loaded with a zero-extended DImode value. */
8378 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8380 emit_move_insn (tmp, mem);
8382 label = gen_label_rtx ();
8384 emit_move_insn (old, tmp);
8385 convert_move (ccv, tmp, /*unsignedp=*/1);
8387 /* Perform the specific operation. Special case NAND by noticing
8388 one_cmpl_optab instead. */
8389 if (binoptab == one_cmpl_optab)
8391 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8392 binoptab = and_optab;
8394 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
8397 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
8399 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
8402 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
8407 /* Expand val_ and bool_compare_and_swap. For val_ we want:
8411 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
8414 For bool_ it's the same except return ret == oldval.
8418 ia64_expand_compare_and_swap (enum machine_mode rmode, enum machine_mode mode,
8419 int boolp, tree arglist, rtx target)
8421 tree arg0, arg1, arg2;
8422 rtx mem, old, new, ccv, tmp, insn;
8424 arg0 = TREE_VALUE (arglist);
8425 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8426 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8427 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8428 old = expand_expr (arg1, NULL_RTX, mode, 0);
8429 new = expand_expr (arg2, NULL_RTX, mode, 0);
8431 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8432 MEM_VOLATILE_P (mem) = 1;
8434 if (GET_MODE (old) != mode)
8435 old = convert_to_mode (mode, old, /*unsignedp=*/1);
8436 if (GET_MODE (new) != mode)
8437 new = convert_to_mode (mode, new, /*unsignedp=*/1);
8439 if (! register_operand (old, mode))
8440 old = copy_to_mode_reg (mode, old);
8441 if (! register_operand (new, mode))
8442 new = copy_to_mode_reg (mode, new);
8444 if (! boolp && target && register_operand (target, mode))
8447 tmp = gen_reg_rtx (mode);
8449 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8450 convert_move (ccv, old, /*unsignedp=*/1);
8451 emit_insn (gen_mf ());
8453 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
8455 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
8461 target = gen_reg_rtx (rmode);
8462 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
8468 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
8471 ia64_expand_lock_test_and_set (enum machine_mode mode, tree arglist,
8475 rtx mem, new, ret, insn;
8477 arg0 = TREE_VALUE (arglist);
8478 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8479 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8480 new = expand_expr (arg1, NULL_RTX, mode, 0);
8482 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8483 MEM_VOLATILE_P (mem) = 1;
8484 if (! register_operand (new, mode))
8485 new = copy_to_mode_reg (mode, new);
8487 if (target && register_operand (target, mode))
8490 ret = gen_reg_rtx (mode);
8493 insn = gen_xchgsi (ret, mem, new);
8495 insn = gen_xchgdi (ret, mem, new);
8501 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
8504 ia64_expand_lock_release (enum machine_mode mode, tree arglist,
8505 rtx target ATTRIBUTE_UNUSED)
8510 arg0 = TREE_VALUE (arglist);
8511 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8513 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8514 MEM_VOLATILE_P (mem) = 1;
8516 emit_move_insn (mem, const0_rtx);
8522 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8523 enum machine_mode mode ATTRIBUTE_UNUSED,
8524 int ignore ATTRIBUTE_UNUSED)
8526 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8527 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8528 tree arglist = TREE_OPERAND (exp, 1);
8529 enum machine_mode rmode = VOIDmode;
8533 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8534 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8539 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8540 case IA64_BUILTIN_LOCK_RELEASE_SI:
8541 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8542 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8543 case IA64_BUILTIN_FETCH_AND_OR_SI:
8544 case IA64_BUILTIN_FETCH_AND_AND_SI:
8545 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8546 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8547 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8548 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8549 case IA64_BUILTIN_OR_AND_FETCH_SI:
8550 case IA64_BUILTIN_AND_AND_FETCH_SI:
8551 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8552 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8556 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8561 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8566 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8567 case IA64_BUILTIN_LOCK_RELEASE_DI:
8568 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8569 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8570 case IA64_BUILTIN_FETCH_AND_OR_DI:
8571 case IA64_BUILTIN_FETCH_AND_AND_DI:
8572 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8573 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8574 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8575 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8576 case IA64_BUILTIN_OR_AND_FETCH_DI:
8577 case IA64_BUILTIN_AND_AND_FETCH_DI:
8578 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8579 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8589 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8590 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8591 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8594 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8595 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8596 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8599 case IA64_BUILTIN_SYNCHRONIZE:
8600 emit_insn (gen_mf ());
8603 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8604 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8605 return ia64_expand_lock_test_and_set (mode, arglist, target);
8607 case IA64_BUILTIN_LOCK_RELEASE_SI:
8608 case IA64_BUILTIN_LOCK_RELEASE_DI:
8609 return ia64_expand_lock_release (mode, arglist, target);
8611 case IA64_BUILTIN_BSP:
8612 if (! target || ! register_operand (target, DImode))
8613 target = gen_reg_rtx (DImode);
8614 emit_insn (gen_bsp_value (target));
8615 #ifdef POINTERS_EXTEND_UNSIGNED
8616 target = convert_memory_address (ptr_mode, target);
8620 case IA64_BUILTIN_FLUSHRS:
8621 emit_insn (gen_flushrs ());
8624 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8625 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8626 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8628 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8629 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8630 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8632 case IA64_BUILTIN_FETCH_AND_OR_SI:
8633 case IA64_BUILTIN_FETCH_AND_OR_DI:
8634 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8636 case IA64_BUILTIN_FETCH_AND_AND_SI:
8637 case IA64_BUILTIN_FETCH_AND_AND_DI:
8638 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8640 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8641 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8642 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8644 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8645 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8646 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8648 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8649 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8650 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8652 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8653 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8654 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8656 case IA64_BUILTIN_OR_AND_FETCH_SI:
8657 case IA64_BUILTIN_OR_AND_FETCH_DI:
8658 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8660 case IA64_BUILTIN_AND_AND_FETCH_SI:
8661 case IA64_BUILTIN_AND_AND_FETCH_DI:
8662 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8664 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8665 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8666 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8668 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8669 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8670 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8679 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8680 most significant bits of the stack slot. */
8683 ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8685 /* Exception to normal case for structures/unions/etc. */
8687 if (type && AGGREGATE_TYPE_P (type)
8688 && int_size_in_bytes (type) < UNITS_PER_WORD)
8691 /* Fall back to the default. */
8692 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8695 /* Linked list of all external functions that are to be emitted by GCC.
8696 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8697 order to avoid putting out names that are never really used. */
8699 struct extern_func_list GTY(())
8701 struct extern_func_list *next;
8705 static GTY(()) struct extern_func_list *extern_func_head;
8708 ia64_hpux_add_extern_decl (tree decl)
8710 struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
8713 p->next = extern_func_head;
8714 extern_func_head = p;
8717 /* Print out the list of used global functions. */
8720 ia64_hpux_file_end (void)
8722 struct extern_func_list *p;
8724 for (p = extern_func_head; p; p = p->next)
8726 tree decl = p->decl;
8727 tree id = DECL_ASSEMBLER_NAME (decl);
8732 if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
8734 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8736 TREE_ASM_WRITTEN (decl) = 1;
8737 (*targetm.asm_out.globalize_label) (asm_out_file, name);
8738 fputs (TYPE_ASM_OP, asm_out_file);
8739 assemble_name (asm_out_file, name);
8740 fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
8744 extern_func_head = 0;
8747 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
8748 modes of word_mode and larger. Rename the TFmode libfuncs using the
8749 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8750 backward compatibility. */
8753 ia64_init_libfuncs (void)
8755 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
8756 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
8757 set_optab_libfunc (smod_optab, SImode, "__modsi3");
8758 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
8760 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8761 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8762 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8763 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8764 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8766 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8767 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8768 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8769 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8770 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8771 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8773 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8774 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8775 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8776 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8778 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8779 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8782 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8785 ia64_hpux_init_libfuncs (void)
8787 ia64_init_libfuncs ();
8789 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8790 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8791 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8793 /* ia64_expand_compare uses this. */
8794 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8796 /* These should never be used. */
8797 set_optab_libfunc (eq_optab, TFmode, 0);
8798 set_optab_libfunc (ne_optab, TFmode, 0);
8799 set_optab_libfunc (gt_optab, TFmode, 0);
8800 set_optab_libfunc (ge_optab, TFmode, 0);
8801 set_optab_libfunc (lt_optab, TFmode, 0);
8802 set_optab_libfunc (le_optab, TFmode, 0);
8805 /* Rename the division and modulus functions in VMS. */
8808 ia64_vms_init_libfuncs (void)
8810 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8811 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8812 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8813 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8814 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8815 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8816 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8817 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8820 /* Rename the TFmode libfuncs available from soft-fp in glibc using
8821 the HPUX conventions. */
8824 ia64_sysv4_init_libfuncs (void)
8826 ia64_init_libfuncs ();
8828 /* These functions are not part of the HPUX TFmode interface. We
8829 use them instead of _U_Qfcmp, which doesn't work the way we
8831 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
8832 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
8833 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
8834 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
8835 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
8836 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
8838 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8839 glibc doesn't have them. */
8842 /* Switch to the section to which we should output X. The only thing
8843 special we do here is to honor small data. */
8846 ia64_select_rtx_section (enum machine_mode mode, rtx x,
8847 unsigned HOST_WIDE_INT align)
8849 if (GET_MODE_SIZE (mode) > 0
8850 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8853 default_elf_select_rtx_section (mode, x, align);
8856 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8857 Pretend flag_pic is always set. */
8860 ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8862 default_elf_select_section_1 (exp, reloc, align, true);
8866 ia64_rwreloc_unique_section (tree decl, int reloc)
8868 default_unique_section_1 (decl, reloc, true);
8872 ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8873 unsigned HOST_WIDE_INT align)
8875 int save_pic = flag_pic;
8877 ia64_select_rtx_section (mode, x, align);
8878 flag_pic = save_pic;
8882 ia64_rwreloc_section_type_flags (tree decl, const char *name, int reloc)
8884 return default_section_type_flags_1 (decl, name, reloc, true);
8887 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8888 structure type and that the address of that type should be passed
8889 in out0, rather than in r8. */
8892 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
8894 tree ret_type = TREE_TYPE (fntype);
8896 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8897 as the structure return address parameter, if the return value
8898 type has a non-trivial copy constructor or destructor. It is not
8899 clear if this same convention should be used for other
8900 programming languages. Until G++ 3.4, we incorrectly used r8 for
8901 these return values. */
8902 return (abi_version_at_least (2)
8904 && TYPE_MODE (ret_type) == BLKmode
8905 && TREE_ADDRESSABLE (ret_type)
8906 && strcmp (lang_hooks.name, "GNU C++") == 0);
8909 /* Output the assembler code for a thunk function. THUNK_DECL is the
8910 declaration for the thunk function itself, FUNCTION is the decl for
8911 the target function. DELTA is an immediate constant offset to be
8912 added to THIS. If VCALL_OFFSET is nonzero, the word at
8913 *(*this + vcall_offset) should be added to THIS. */
8916 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8917 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8920 rtx this, insn, funexp;
8921 unsigned int this_parmno;
8922 unsigned int this_regno;
8924 reload_completed = 1;
8925 epilogue_completed = 1;
8927 reset_block_changes ();
8929 /* Set things up as ia64_expand_prologue might. */
8930 last_scratch_gr_reg = 15;
8932 memset (¤t_frame_info, 0, sizeof (current_frame_info));
8933 current_frame_info.spill_cfa_off = -16;
8934 current_frame_info.n_input_regs = 1;
8935 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8937 /* Mark the end of the (empty) prologue. */
8938 emit_note (NOTE_INSN_PROLOGUE_END);
8940 /* Figure out whether "this" will be the first parameter (the
8941 typical case) or the second parameter (as happens when the
8942 virtual function returns certain class objects). */
8944 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
8946 this_regno = IN_REG (this_parmno);
8947 if (!TARGET_REG_NAMES)
8948 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
8950 this = gen_rtx_REG (Pmode, this_regno);
8953 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
8954 REG_POINTER (tmp) = 1;
8955 if (delta && CONST_OK_FOR_I (delta))
8957 emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8961 emit_insn (gen_ptr_extend (this, tmp));
8964 /* Apply the constant offset, if required. */
8967 rtx delta_rtx = GEN_INT (delta);
8969 if (!CONST_OK_FOR_I (delta))
8971 rtx tmp = gen_rtx_REG (Pmode, 2);
8972 emit_move_insn (tmp, delta_rtx);
8975 emit_insn (gen_adddi3 (this, this, delta_rtx));
8978 /* Apply the offset from the vtable, if required. */
8981 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8982 rtx tmp = gen_rtx_REG (Pmode, 2);
8986 rtx t = gen_rtx_REG (ptr_mode, 2);
8987 REG_POINTER (t) = 1;
8988 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8989 if (CONST_OK_FOR_I (vcall_offset))
8991 emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8996 emit_insn (gen_ptr_extend (tmp, t));
8999 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
9003 if (!CONST_OK_FOR_J (vcall_offset))
9005 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
9006 emit_move_insn (tmp2, vcall_offset_rtx);
9007 vcall_offset_rtx = tmp2;
9009 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
9013 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
9014 gen_rtx_MEM (ptr_mode, tmp));
9016 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
9018 emit_insn (gen_adddi3 (this, this, tmp));
9021 /* Generate a tail call to the target function. */
9022 if (! TREE_USED (function))
9024 assemble_external (function);
9025 TREE_USED (function) = 1;
9027 funexp = XEXP (DECL_RTL (function), 0);
9028 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
9029 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
9030 insn = get_last_insn ();
9031 SIBLING_CALL_P (insn) = 1;
9033 /* Code generation for calls relies on splitting. */
9034 reload_completed = 1;
9035 epilogue_completed = 1;
9036 try_split (PATTERN (insn), insn, 0);
9040 /* Run just enough of rest_of_compilation to get the insns emitted.
9041 There's not really enough bulk here to make other passes such as
9042 instruction scheduling worth while. Note that use_thunk calls
9043 assemble_start_function and assemble_end_function. */
9045 insn_locators_initialize ();
9046 emit_all_insn_group_barriers (NULL);
9047 insn = get_insns ();
9048 shorten_branches (insn);
9049 final_start_function (insn, file, 1);
9050 final (insn, file, 1, 0);
9051 final_end_function ();
9053 reload_completed = 0;
9054 epilogue_completed = 0;
9058 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9061 ia64_struct_value_rtx (tree fntype,
9062 int incoming ATTRIBUTE_UNUSED)
9064 if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
9066 return gen_rtx_REG (Pmode, GR_REG (8));
9069 #include "gt-ia64.h"