1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
46 #include "sched-int.h"
49 #include "target-def.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
56 /* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58 int ia64_asm_output_label = 0;
60 /* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62 struct rtx_def * ia64_compare_op0;
63 struct rtx_def * ia64_compare_op1;
65 /* Register names for ia64_expand_prologue. */
66 static const char * const ia64_reg_numbers[96] =
67 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
80 /* ??? These strings could be shared with REGISTER_NAMES. */
81 static const char * const ia64_input_reg_names[8] =
82 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_local_reg_names[80] =
86 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
97 /* ??? These strings could be shared with REGISTER_NAMES. */
98 static const char * const ia64_output_reg_names[8] =
99 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
101 /* String used with the -mfixed-range= option. */
102 const char *ia64_fixed_range_string;
104 /* Determines whether we use adds, addl, or movl to generate our
105 TLS immediate offsets. */
106 int ia64_tls_size = 22;
108 /* String used with the -mtls-size= option. */
109 const char *ia64_tls_size_string;
111 /* Which cpu are we scheduling for. */
112 enum processor_type ia64_tune;
114 /* String used with the -tune= option. */
115 const char *ia64_tune_string;
117 /* Determines whether we run our final scheduling pass or not. We always
118 avoid the normal second scheduling pass. */
119 static int ia64_flag_schedule_insns2;
121 /* Determines whether we run variable tracking in machine dependent
123 static int ia64_flag_var_tracking;
125 /* Variables which are this size or smaller are put in the sdata/sbss
128 unsigned int ia64_section_threshold;
130 /* The following variable is used by the DFA insn scheduler. The value is
131 TRUE if we do insn bundling instead of insn scheduling. */
134 /* Structure to be filled in by ia64_compute_frame_size with register
135 save masks and offsets for the current function. */
137 struct ia64_frame_info
139 HOST_WIDE_INT total_size; /* size of the stack frame, not including
140 the caller's scratch area. */
141 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
142 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
143 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
144 HARD_REG_SET mask; /* mask of saved registers. */
145 unsigned int gr_used_mask; /* mask of registers in use as gr spill
146 registers or long-term scratches. */
147 int n_spilled; /* number of spilled registers. */
148 int reg_fp; /* register for fp. */
149 int reg_save_b0; /* save register for b0. */
150 int reg_save_pr; /* save register for prs. */
151 int reg_save_ar_pfs; /* save register for ar.pfs. */
152 int reg_save_ar_unat; /* save register for ar.unat. */
153 int reg_save_ar_lc; /* save register for ar.lc. */
154 int reg_save_gp; /* save register for gp. */
155 int n_input_regs; /* number of input registers used. */
156 int n_local_regs; /* number of local registers used. */
157 int n_output_regs; /* number of output registers used. */
158 int n_rotate_regs; /* number of rotating registers used. */
160 char need_regstk; /* true if a .regstk directive needed. */
161 char initialized; /* true if the data is finalized. */
164 /* Current frame information calculated by ia64_compute_frame_size. */
165 static struct ia64_frame_info current_frame_info;
167 static int ia64_first_cycle_multipass_dfa_lookahead (void);
168 static void ia64_dependencies_evaluation_hook (rtx, rtx);
169 static void ia64_init_dfa_pre_cycle_insn (void);
170 static rtx ia64_dfa_pre_cycle_insn (void);
171 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
172 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
173 static rtx gen_tls_get_addr (void);
174 static rtx gen_thread_pointer (void);
175 static rtx ia64_expand_tls_address (enum tls_model, rtx, rtx);
176 static int find_gr_spill (int);
177 static int next_scratch_gr_reg (void);
178 static void mark_reg_gr_used_mask (rtx, void *);
179 static void ia64_compute_frame_size (HOST_WIDE_INT);
180 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
181 static void finish_spill_pointers (void);
182 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
183 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
184 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
185 static rtx gen_movdi_x (rtx, rtx, rtx);
186 static rtx gen_fr_spill_x (rtx, rtx, rtx);
187 static rtx gen_fr_restore_x (rtx, rtx, rtx);
189 static enum machine_mode hfa_element_mode (tree, int);
190 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
192 static bool ia64_function_ok_for_sibcall (tree, tree);
193 static bool ia64_return_in_memory (tree, tree);
194 static bool ia64_rtx_costs (rtx, int, int, int *);
195 static void fix_range (const char *);
196 static struct machine_function * ia64_init_machine_status (void);
197 static void emit_insn_group_barriers (FILE *);
198 static void emit_all_insn_group_barriers (FILE *);
199 static void final_emit_insn_group_barriers (FILE *);
200 static void emit_predicate_relation_info (void);
201 static void ia64_reorg (void);
202 static bool ia64_in_small_data_p (tree);
203 static void process_epilogue (void);
204 static int process_set (FILE *, rtx);
206 static rtx ia64_expand_fetch_and_op (optab, enum machine_mode, tree, rtx);
207 static rtx ia64_expand_op_and_fetch (optab, enum machine_mode, tree, rtx);
208 static rtx ia64_expand_compare_and_swap (enum machine_mode, enum machine_mode,
210 static rtx ia64_expand_lock_test_and_set (enum machine_mode, tree, rtx);
211 static rtx ia64_expand_lock_release (enum machine_mode, tree, rtx);
212 static bool ia64_assemble_integer (rtx, unsigned int, int);
213 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
214 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
215 static void ia64_output_function_end_prologue (FILE *);
217 static int ia64_issue_rate (void);
218 static int ia64_adjust_cost (rtx, rtx, rtx, int);
219 static void ia64_sched_init (FILE *, int, int);
220 static void ia64_sched_finish (FILE *, int);
221 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
222 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
223 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
224 static int ia64_variable_issue (FILE *, int, rtx, int);
226 static struct bundle_state *get_free_bundle_state (void);
227 static void free_bundle_state (struct bundle_state *);
228 static void initiate_bundle_states (void);
229 static void finish_bundle_states (void);
230 static unsigned bundle_state_hash (const void *);
231 static int bundle_state_eq_p (const void *, const void *);
232 static int insert_bundle_state (struct bundle_state *);
233 static void initiate_bundle_state_table (void);
234 static void finish_bundle_state_table (void);
235 static int try_issue_nops (struct bundle_state *, int);
236 static int try_issue_insn (struct bundle_state *, rtx);
237 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
238 static int get_max_pos (state_t);
239 static int get_template (state_t, int);
241 static rtx get_next_important_insn (rtx, rtx);
242 static void bundling (FILE *, int, rtx, rtx);
244 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
245 HOST_WIDE_INT, tree);
246 static void ia64_file_start (void);
248 static void ia64_select_rtx_section (enum machine_mode, rtx,
249 unsigned HOST_WIDE_INT);
250 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
252 static void ia64_rwreloc_unique_section (tree, int)
254 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
255 unsigned HOST_WIDE_INT)
257 static unsigned int ia64_rwreloc_section_type_flags (tree, const char *, int)
260 static void ia64_hpux_add_extern_decl (tree decl)
262 static void ia64_hpux_file_end (void)
264 static void ia64_init_libfuncs (void)
266 static void ia64_hpux_init_libfuncs (void)
268 static void ia64_sysv4_init_libfuncs (void)
270 static void ia64_vms_init_libfuncs (void)
273 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
274 static void ia64_encode_section_info (tree, rtx, int);
275 static rtx ia64_struct_value_rtx (tree, int);
276 static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
279 /* Table of valid machine attributes. */
280 static const struct attribute_spec ia64_attribute_table[] =
282 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
283 { "syscall_linkage", 0, 0, false, true, true, NULL },
284 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
285 { NULL, 0, 0, false, false, false, NULL }
288 /* Initialize the GCC target structure. */
289 #undef TARGET_ATTRIBUTE_TABLE
290 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
292 #undef TARGET_INIT_BUILTINS
293 #define TARGET_INIT_BUILTINS ia64_init_builtins
295 #undef TARGET_EXPAND_BUILTIN
296 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
298 #undef TARGET_ASM_BYTE_OP
299 #define TARGET_ASM_BYTE_OP "\tdata1\t"
300 #undef TARGET_ASM_ALIGNED_HI_OP
301 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
302 #undef TARGET_ASM_ALIGNED_SI_OP
303 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
304 #undef TARGET_ASM_ALIGNED_DI_OP
305 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
306 #undef TARGET_ASM_UNALIGNED_HI_OP
307 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
308 #undef TARGET_ASM_UNALIGNED_SI_OP
309 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
310 #undef TARGET_ASM_UNALIGNED_DI_OP
311 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
312 #undef TARGET_ASM_INTEGER
313 #define TARGET_ASM_INTEGER ia64_assemble_integer
315 #undef TARGET_ASM_FUNCTION_PROLOGUE
316 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
317 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
318 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
319 #undef TARGET_ASM_FUNCTION_EPILOGUE
320 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
322 #undef TARGET_IN_SMALL_DATA_P
323 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
325 #undef TARGET_SCHED_ADJUST_COST
326 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
327 #undef TARGET_SCHED_ISSUE_RATE
328 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
329 #undef TARGET_SCHED_VARIABLE_ISSUE
330 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
331 #undef TARGET_SCHED_INIT
332 #define TARGET_SCHED_INIT ia64_sched_init
333 #undef TARGET_SCHED_FINISH
334 #define TARGET_SCHED_FINISH ia64_sched_finish
335 #undef TARGET_SCHED_REORDER
336 #define TARGET_SCHED_REORDER ia64_sched_reorder
337 #undef TARGET_SCHED_REORDER2
338 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
340 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
341 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
343 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
344 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hook_int_void_1
346 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
347 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
349 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
350 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
351 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
352 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
354 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
355 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
356 ia64_first_cycle_multipass_dfa_lookahead_guard
358 #undef TARGET_SCHED_DFA_NEW_CYCLE
359 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
361 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
362 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
364 #undef TARGET_ASM_OUTPUT_MI_THUNK
365 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
366 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
367 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
369 #undef TARGET_ASM_FILE_START
370 #define TARGET_ASM_FILE_START ia64_file_start
372 #undef TARGET_RTX_COSTS
373 #define TARGET_RTX_COSTS ia64_rtx_costs
374 #undef TARGET_ADDRESS_COST
375 #define TARGET_ADDRESS_COST hook_int_rtx_0
377 #undef TARGET_MACHINE_DEPENDENT_REORG
378 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
380 #undef TARGET_ENCODE_SECTION_INFO
381 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
383 /* ??? ABI doesn't allow us to define this. */
385 #undef TARGET_PROMOTE_FUNCTION_ARGS
386 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
389 /* ??? ABI doesn't allow us to define this. */
391 #undef TARGET_PROMOTE_FUNCTION_RETURN
392 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
395 /* ??? Investigate. */
397 #undef TARGET_PROMOTE_PROTOTYPES
398 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
401 #undef TARGET_STRUCT_VALUE_RTX
402 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
403 #undef TARGET_RETURN_IN_MEMORY
404 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
406 #undef TARGET_SETUP_INCOMING_VARARGS
407 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
408 #undef TARGET_STRICT_ARGUMENT_NAMING
409 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
411 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
412 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
414 struct gcc_target targetm = TARGET_INITIALIZER;
416 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
419 call_operand (rtx op, enum machine_mode mode)
421 if (mode != GET_MODE (op) && mode != VOIDmode)
424 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
425 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
428 /* Return 1 if OP refers to a symbol in the sdata section. */
431 sdata_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
433 switch (GET_CODE (op))
436 if (GET_CODE (XEXP (op, 0)) != PLUS
437 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
439 op = XEXP (XEXP (op, 0), 0);
443 if (CONSTANT_POOL_ADDRESS_P (op))
444 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
446 return SYMBOL_REF_LOCAL_P (op) && SYMBOL_REF_SMALL_P (op);
456 small_addr_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
458 return SYMBOL_REF_SMALL_ADDR_P (op);
461 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
464 got_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
466 switch (GET_CODE (op))
470 if (GET_CODE (op) != PLUS)
472 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
475 if (GET_CODE (op) != CONST_INT)
480 /* Ok if we're not using GOT entries at all. */
481 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
484 /* "Ok" while emitting rtl, since otherwise we won't be provided
485 with the entire offset during emission, which makes it very
486 hard to split the offset into high and low parts. */
487 if (rtx_equal_function_value_matters)
490 /* Force the low 14 bits of the constant to zero so that we do not
491 use up so many GOT entries. */
492 return (INTVAL (op) & 0x3fff) == 0;
495 if (SYMBOL_REF_SMALL_ADDR_P (op))
506 /* Return 1 if OP refers to a symbol. */
509 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
511 switch (GET_CODE (op))
524 /* Return tls_model if OP refers to a TLS symbol. */
527 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
529 if (GET_CODE (op) != SYMBOL_REF)
531 return SYMBOL_REF_TLS_MODEL (op);
535 /* Return 1 if OP refers to a function. */
538 function_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
540 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (op))
546 /* Return 1 if OP is setjmp or a similar function. */
548 /* ??? This is an unsatisfying solution. Should rethink. */
551 setjmp_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
556 if (GET_CODE (op) != SYMBOL_REF)
561 /* The following code is borrowed from special_function_p in calls.c. */
563 /* Disregard prefix _, __ or __x. */
566 if (name[1] == '_' && name[2] == 'x')
568 else if (name[1] == '_')
578 && (! strcmp (name, "setjmp")
579 || ! strcmp (name, "setjmp_syscall")))
581 && ! strcmp (name, "sigsetjmp"))
583 && ! strcmp (name, "savectx")));
585 else if ((name[0] == 'q' && name[1] == 's'
586 && ! strcmp (name, "qsetjmp"))
587 || (name[0] == 'v' && name[1] == 'f'
588 && ! strcmp (name, "vfork")))
594 /* Return 1 if OP is a general operand, excluding tls symbolic operands. */
597 move_operand (rtx op, enum machine_mode mode)
599 return general_operand (op, mode) && !tls_symbolic_operand (op, mode);
602 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
605 gr_register_operand (rtx op, enum machine_mode mode)
607 if (! register_operand (op, mode))
609 if (GET_CODE (op) == SUBREG)
610 op = SUBREG_REG (op);
611 if (GET_CODE (op) == REG)
613 unsigned int regno = REGNO (op);
614 if (regno < FIRST_PSEUDO_REGISTER)
615 return GENERAL_REGNO_P (regno);
620 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
623 fr_register_operand (rtx op, enum machine_mode mode)
625 if (! register_operand (op, mode))
627 if (GET_CODE (op) == SUBREG)
628 op = SUBREG_REG (op);
629 if (GET_CODE (op) == REG)
631 unsigned int regno = REGNO (op);
632 if (regno < FIRST_PSEUDO_REGISTER)
633 return FR_REGNO_P (regno);
638 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
641 grfr_register_operand (rtx op, enum machine_mode mode)
643 if (! register_operand (op, mode))
645 if (GET_CODE (op) == SUBREG)
646 op = SUBREG_REG (op);
647 if (GET_CODE (op) == REG)
649 unsigned int regno = REGNO (op);
650 if (regno < FIRST_PSEUDO_REGISTER)
651 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
656 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
659 gr_nonimmediate_operand (rtx op, enum machine_mode mode)
661 if (! nonimmediate_operand (op, mode))
663 if (GET_CODE (op) == SUBREG)
664 op = SUBREG_REG (op);
665 if (GET_CODE (op) == REG)
667 unsigned int regno = REGNO (op);
668 if (regno < FIRST_PSEUDO_REGISTER)
669 return GENERAL_REGNO_P (regno);
674 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
677 fr_nonimmediate_operand (rtx op, enum machine_mode mode)
679 if (! nonimmediate_operand (op, mode))
681 if (GET_CODE (op) == SUBREG)
682 op = SUBREG_REG (op);
683 if (GET_CODE (op) == REG)
685 unsigned int regno = REGNO (op);
686 if (regno < FIRST_PSEUDO_REGISTER)
687 return FR_REGNO_P (regno);
692 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
695 grfr_nonimmediate_operand (rtx op, enum machine_mode mode)
697 if (! nonimmediate_operand (op, mode))
699 if (GET_CODE (op) == SUBREG)
700 op = SUBREG_REG (op);
701 if (GET_CODE (op) == REG)
703 unsigned int regno = REGNO (op);
704 if (regno < FIRST_PSEUDO_REGISTER)
705 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
710 /* Return 1 if OP is a GR register operand, or zero. */
713 gr_reg_or_0_operand (rtx op, enum machine_mode mode)
715 return (op == const0_rtx || gr_register_operand (op, mode));
718 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
721 gr_reg_or_5bit_operand (rtx op, enum machine_mode mode)
723 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
724 || gr_register_operand (op, mode));
727 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
730 gr_reg_or_6bit_operand (rtx op, enum machine_mode mode)
732 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
733 || gr_register_operand (op, mode));
736 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
739 gr_reg_or_8bit_operand (rtx op, enum machine_mode mode)
741 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
742 || gr_register_operand (op, mode));
745 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
748 grfr_reg_or_8bit_operand (rtx op, enum machine_mode mode)
750 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
751 || grfr_register_operand (op, mode));
754 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
758 gr_reg_or_8bit_adjusted_operand (rtx op, enum machine_mode mode)
760 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
761 || gr_register_operand (op, mode));
764 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
765 immediate and an 8 bit adjusted immediate operand. This is necessary
766 because when we emit a compare, we don't know what the condition will be,
767 so we need the union of the immediates accepted by GT and LT. */
770 gr_reg_or_8bit_and_adjusted_operand (rtx op, enum machine_mode mode)
772 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
773 && CONST_OK_FOR_L (INTVAL (op)))
774 || gr_register_operand (op, mode));
777 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
780 gr_reg_or_14bit_operand (rtx op, enum machine_mode mode)
782 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
783 || gr_register_operand (op, mode));
786 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
789 gr_reg_or_22bit_operand (rtx op, enum machine_mode mode)
791 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
792 || gr_register_operand (op, mode));
795 /* Return 1 if OP is a 6 bit immediate operand. */
798 shift_count_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
800 return (GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)));
803 /* Return 1 if OP is a 5 bit immediate operand. */
806 shift_32bit_count_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
808 return (GET_CODE (op) == CONST_INT
809 && (INTVAL (op) >= 0 && INTVAL (op) < 32));
812 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
815 shladd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
817 return (GET_CODE (op) == CONST_INT
818 && (INTVAL (op) == 2 || INTVAL (op) == 4
819 || INTVAL (op) == 8 || INTVAL (op) == 16));
822 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
825 fetchadd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
827 return (GET_CODE (op) == CONST_INT
828 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
829 INTVAL (op) == -4 || INTVAL (op) == -1 ||
830 INTVAL (op) == 1 || INTVAL (op) == 4 ||
831 INTVAL (op) == 8 || INTVAL (op) == 16));
834 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
837 fr_reg_or_fp01_operand (rtx op, enum machine_mode mode)
839 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
840 || fr_register_operand (op, mode));
843 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
844 POST_MODIFY with a REG as displacement. */
847 destination_operand (rtx op, enum machine_mode mode)
849 if (! nonimmediate_operand (op, mode))
851 if (GET_CODE (op) == MEM
852 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
853 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
858 /* Like memory_operand, but don't allow post-increments. */
861 not_postinc_memory_operand (rtx op, enum machine_mode mode)
863 return (memory_operand (op, mode)
864 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != RTX_AUTOINC);
867 /* Return 1 if this is a comparison operator, which accepts a normal 8-bit
868 signed immediate operand. */
871 normal_comparison_operator (register rtx op, enum machine_mode mode)
873 enum rtx_code code = GET_CODE (op);
874 return ((mode == VOIDmode || GET_MODE (op) == mode)
875 && (code == EQ || code == NE
876 || code == GT || code == LE || code == GTU || code == LEU));
879 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
880 signed immediate operand. */
883 adjusted_comparison_operator (register rtx op, enum machine_mode mode)
885 enum rtx_code code = GET_CODE (op);
886 return ((mode == VOIDmode || GET_MODE (op) == mode)
887 && (code == LT || code == GE || code == LTU || code == GEU));
890 /* Return 1 if this is a signed inequality operator. */
893 signed_inequality_operator (register rtx op, enum machine_mode mode)
895 enum rtx_code code = GET_CODE (op);
896 return ((mode == VOIDmode || GET_MODE (op) == mode)
897 && (code == GE || code == GT
898 || code == LE || code == LT));
901 /* Return 1 if this operator is valid for predication. */
904 predicate_operator (register rtx op, enum machine_mode mode)
906 enum rtx_code code = GET_CODE (op);
907 return ((GET_MODE (op) == mode || mode == VOIDmode)
908 && (code == EQ || code == NE));
911 /* Return 1 if this operator can be used in a conditional operation. */
914 condop_operator (register rtx op, enum machine_mode mode)
916 enum rtx_code code = GET_CODE (op);
917 return ((GET_MODE (op) == mode || mode == VOIDmode)
918 && (code == PLUS || code == MINUS || code == AND
919 || code == IOR || code == XOR));
922 /* Return 1 if this is the ar.lc register. */
925 ar_lc_reg_operand (register rtx op, enum machine_mode mode)
927 return (GET_MODE (op) == DImode
928 && (mode == DImode || mode == VOIDmode)
929 && GET_CODE (op) == REG
930 && REGNO (op) == AR_LC_REGNUM);
933 /* Return 1 if this is the ar.ccv register. */
936 ar_ccv_reg_operand (register rtx op, enum machine_mode mode)
938 return ((GET_MODE (op) == mode || mode == VOIDmode)
939 && GET_CODE (op) == REG
940 && REGNO (op) == AR_CCV_REGNUM);
943 /* Return 1 if this is the ar.pfs register. */
946 ar_pfs_reg_operand (register rtx op, enum machine_mode mode)
948 return ((GET_MODE (op) == mode || mode == VOIDmode)
949 && GET_CODE (op) == REG
950 && REGNO (op) == AR_PFS_REGNUM);
953 /* Like general_operand, but don't allow (mem (addressof)). */
956 general_xfmode_operand (rtx op, enum machine_mode mode)
958 if (! general_operand (op, mode))
966 destination_xfmode_operand (rtx op, enum machine_mode mode)
968 if (! destination_operand (op, mode))
976 xfreg_or_fp01_operand (rtx op, enum machine_mode mode)
978 if (GET_CODE (op) == SUBREG)
980 return fr_reg_or_fp01_operand (op, mode);
983 /* Return 1 if OP is valid as a base register in a reg + offset address. */
986 basereg_operand (rtx op, enum machine_mode mode)
988 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
989 checks from pa.c basereg_operand as well? Seems to be OK without them
992 return (register_operand (op, mode) &&
993 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
998 ADDR_AREA_NORMAL, /* normal address area */
999 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
1003 static GTY(()) tree small_ident1;
1004 static GTY(()) tree small_ident2;
1009 if (small_ident1 == 0)
1011 small_ident1 = get_identifier ("small");
1012 small_ident2 = get_identifier ("__small__");
1016 /* Retrieve the address area that has been chosen for the given decl. */
1018 static ia64_addr_area
1019 ia64_get_addr_area (tree decl)
1023 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
1029 id = TREE_VALUE (TREE_VALUE (model_attr));
1030 if (id == small_ident1 || id == small_ident2)
1031 return ADDR_AREA_SMALL;
1033 return ADDR_AREA_NORMAL;
1037 ia64_handle_model_attribute (tree *node, tree name, tree args, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1039 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
1040 ia64_addr_area area;
1041 tree arg, decl = *node;
1044 arg = TREE_VALUE (args);
1045 if (arg == small_ident1 || arg == small_ident2)
1047 addr_area = ADDR_AREA_SMALL;
1051 warning ("invalid argument of `%s' attribute",
1052 IDENTIFIER_POINTER (name));
1053 *no_add_attrs = true;
1056 switch (TREE_CODE (decl))
1059 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
1061 && !TREE_STATIC (decl))
1063 error ("%Jan address area attribute cannot be specified for "
1064 "local variables", decl, decl);
1065 *no_add_attrs = true;
1067 area = ia64_get_addr_area (decl);
1068 if (area != ADDR_AREA_NORMAL && addr_area != area)
1070 error ("%Jaddress area of '%s' conflicts with previous "
1071 "declaration", decl, decl);
1072 *no_add_attrs = true;
1077 error ("%Jaddress area attribute cannot be specified for functions",
1079 *no_add_attrs = true;
1083 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1084 *no_add_attrs = true;
1092 ia64_encode_addr_area (tree decl, rtx symbol)
1096 flags = SYMBOL_REF_FLAGS (symbol);
1097 switch (ia64_get_addr_area (decl))
1099 case ADDR_AREA_NORMAL: break;
1100 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
1103 SYMBOL_REF_FLAGS (symbol) = flags;
1107 ia64_encode_section_info (tree decl, rtx rtl, int first)
1109 default_encode_section_info (decl, rtl, first);
1111 /* Careful not to prod global register variables. */
1112 if (TREE_CODE (decl) == VAR_DECL
1113 && GET_CODE (DECL_RTL (decl)) == MEM
1114 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
1115 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
1116 ia64_encode_addr_area (decl, XEXP (rtl, 0));
1119 /* Return 1 if the operands of a move are ok. */
1122 ia64_move_ok (rtx dst, rtx src)
1124 /* If we're under init_recog_no_volatile, we'll not be able to use
1125 memory_operand. So check the code directly and don't worry about
1126 the validity of the underlying address, which should have been
1127 checked elsewhere anyway. */
1128 if (GET_CODE (dst) != MEM)
1130 if (GET_CODE (src) == MEM)
1132 if (register_operand (src, VOIDmode))
1135 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
1136 if (INTEGRAL_MODE_P (GET_MODE (dst)))
1137 return src == const0_rtx;
1139 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1143 addp4_optimize_ok (rtx op1, rtx op2)
1145 return (basereg_operand (op1, GET_MODE(op1)) !=
1146 basereg_operand (op2, GET_MODE(op2)));
1149 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
1150 Return the length of the field, or <= 0 on failure. */
1153 ia64_depz_field_mask (rtx rop, rtx rshift)
1155 unsigned HOST_WIDE_INT op = INTVAL (rop);
1156 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1158 /* Get rid of the zero bits we're shifting in. */
1161 /* We must now have a solid block of 1's at bit 0. */
1162 return exact_log2 (op + 1);
1165 /* Expand a symbolic constant load. */
1168 ia64_expand_load_address (rtx dest, rtx src)
1170 if (tls_symbolic_operand (src, VOIDmode))
1172 if (GET_CODE (dest) != REG)
1175 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1176 having to pointer-extend the value afterward. Other forms of address
1177 computation below are also more natural to compute as 64-bit quantities.
1178 If we've been given an SImode destination register, change it. */
1179 if (GET_MODE (dest) != Pmode)
1180 dest = gen_rtx_REG (Pmode, REGNO (dest));
1182 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (src))
1184 emit_insn (gen_rtx_SET (VOIDmode, dest, src));
1187 else if (TARGET_AUTO_PIC)
1189 emit_insn (gen_load_gprel64 (dest, src));
1192 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1194 emit_insn (gen_load_fptr (dest, src));
1197 else if (sdata_symbolic_operand (src, VOIDmode))
1199 emit_insn (gen_load_gprel (dest, src));
1203 if (GET_CODE (src) == CONST
1204 && GET_CODE (XEXP (src, 0)) == PLUS
1205 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1206 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1208 rtx sym = XEXP (XEXP (src, 0), 0);
1209 HOST_WIDE_INT ofs, hi, lo;
1211 /* Split the offset into a sign extended 14-bit low part
1212 and a complementary high part. */
1213 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1214 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1217 ia64_expand_load_address (dest, plus_constant (sym, hi));
1218 emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
1224 tmp = gen_rtx_HIGH (Pmode, src);
1225 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1226 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1228 tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
1229 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1233 static GTY(()) rtx gen_tls_tga;
1235 gen_tls_get_addr (void)
1238 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1242 static GTY(()) rtx thread_pointer_rtx;
1244 gen_thread_pointer (void)
1246 if (!thread_pointer_rtx)
1248 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1249 RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1251 return thread_pointer_rtx;
1255 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1)
1257 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1262 case TLS_MODEL_GLOBAL_DYNAMIC:
1265 tga_op1 = gen_reg_rtx (Pmode);
1266 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1267 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1268 RTX_UNCHANGING_P (tga_op1) = 1;
1270 tga_op2 = gen_reg_rtx (Pmode);
1271 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1272 tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1273 RTX_UNCHANGING_P (tga_op2) = 1;
1275 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1276 LCT_CONST, Pmode, 2, tga_op1,
1277 Pmode, tga_op2, Pmode);
1279 insns = get_insns ();
1282 if (GET_MODE (op0) != Pmode)
1284 emit_libcall_block (insns, op0, tga_ret, op1);
1287 case TLS_MODEL_LOCAL_DYNAMIC:
1288 /* ??? This isn't the completely proper way to do local-dynamic
1289 If the call to __tls_get_addr is used only by a single symbol,
1290 then we should (somehow) move the dtprel to the second arg
1291 to avoid the extra add. */
1294 tga_op1 = gen_reg_rtx (Pmode);
1295 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1296 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1297 RTX_UNCHANGING_P (tga_op1) = 1;
1299 tga_op2 = const0_rtx;
1301 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1302 LCT_CONST, Pmode, 2, tga_op1,
1303 Pmode, tga_op2, Pmode);
1305 insns = get_insns ();
1308 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1310 tmp = gen_reg_rtx (Pmode);
1311 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1313 if (!register_operand (op0, Pmode))
1314 op0 = gen_reg_rtx (Pmode);
1317 emit_insn (gen_load_dtprel (op0, op1));
1318 emit_insn (gen_adddi3 (op0, tmp, op0));
1321 emit_insn (gen_add_dtprel (op0, tmp, op1));
1324 case TLS_MODEL_INITIAL_EXEC:
1325 tmp = gen_reg_rtx (Pmode);
1326 emit_insn (gen_load_ltoff_tprel (tmp, op1));
1327 tmp = gen_rtx_MEM (Pmode, tmp);
1328 RTX_UNCHANGING_P (tmp) = 1;
1329 tmp = force_reg (Pmode, tmp);
1331 if (!register_operand (op0, Pmode))
1332 op0 = gen_reg_rtx (Pmode);
1333 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1336 case TLS_MODEL_LOCAL_EXEC:
1337 if (!register_operand (op0, Pmode))
1338 op0 = gen_reg_rtx (Pmode);
1341 emit_insn (gen_load_tprel (op0, op1));
1342 emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
1345 emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
1352 if (orig_op0 == op0)
1354 if (GET_MODE (orig_op0) == Pmode)
1356 return gen_lowpart (GET_MODE (orig_op0), op0);
1360 ia64_expand_move (rtx op0, rtx op1)
1362 enum machine_mode mode = GET_MODE (op0);
1364 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1365 op1 = force_reg (mode, op1);
1367 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1369 enum tls_model tls_kind;
1370 if ((tls_kind = tls_symbolic_operand (op1, VOIDmode)))
1371 return ia64_expand_tls_address (tls_kind, op0, op1);
1373 if (!TARGET_NO_PIC && reload_completed)
1375 ia64_expand_load_address (op0, op1);
1383 /* Split a move from OP1 to OP0 conditional on COND. */
1386 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1388 rtx insn, first = get_last_insn ();
1390 emit_move_insn (op0, op1);
1392 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1394 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1398 /* Split a post-reload TImode or TFmode reference into two DImode
1399 components. This is made extra difficult by the fact that we do
1400 not get any scratch registers to work with, because reload cannot
1401 be prevented from giving us a scratch that overlaps the register
1402 pair involved. So instead, when addressing memory, we tweak the
1403 pointer register up and back down with POST_INCs. Or up and not
1404 back down when we can get away with it.
1406 REVERSED is true when the loads must be done in reversed order
1407 (high word first) for correctness. DEAD is true when the pointer
1408 dies with the second insn we generate and therefore the second
1409 address must not carry a postmodify.
1411 May return an insn which is to be emitted after the moves. */
1414 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1418 switch (GET_CODE (in))
1421 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1422 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1427 /* Cannot occur reversed. */
1428 if (reversed) abort ();
1430 if (GET_MODE (in) != TFmode)
1431 split_double (in, &out[0], &out[1]);
1433 /* split_double does not understand how to split a TFmode
1434 quantity into a pair of DImode constants. */
1437 unsigned HOST_WIDE_INT p[2];
1438 long l[4]; /* TFmode is 128 bits */
1440 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1441 real_to_target (l, &r, TFmode);
1443 if (FLOAT_WORDS_BIG_ENDIAN)
1445 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1446 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1450 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1451 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1453 out[0] = GEN_INT (p[0]);
1454 out[1] = GEN_INT (p[1]);
1460 rtx base = XEXP (in, 0);
1463 switch (GET_CODE (base))
1468 out[0] = adjust_automodify_address
1469 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1470 out[1] = adjust_automodify_address
1471 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1475 /* Reversal requires a pre-increment, which can only
1476 be done as a separate insn. */
1477 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1478 out[0] = adjust_automodify_address
1479 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1480 out[1] = adjust_address (in, DImode, 0);
1485 if (reversed || dead) abort ();
1486 /* Just do the increment in two steps. */
1487 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1488 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1492 if (reversed || dead) abort ();
1493 /* Add 8, subtract 24. */
1494 base = XEXP (base, 0);
1495 out[0] = adjust_automodify_address
1496 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1497 out[1] = adjust_automodify_address
1499 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1504 if (reversed || dead) abort ();
1505 /* Extract and adjust the modification. This case is
1506 trickier than the others, because we might have an
1507 index register, or we might have a combined offset that
1508 doesn't fit a signed 9-bit displacement field. We can
1509 assume the incoming expression is already legitimate. */
1510 offset = XEXP (base, 1);
1511 base = XEXP (base, 0);
1513 out[0] = adjust_automodify_address
1514 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1516 if (GET_CODE (XEXP (offset, 1)) == REG)
1518 /* Can't adjust the postmodify to match. Emit the
1519 original, then a separate addition insn. */
1520 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1521 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1523 else if (GET_CODE (XEXP (offset, 1)) != CONST_INT)
1525 else if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1527 /* Again the postmodify cannot be made to match, but
1528 in this case it's more efficient to get rid of the
1529 postmodify entirely and fix up with an add insn. */
1530 out[1] = adjust_automodify_address (in, DImode, base, 8);
1531 fixup = gen_adddi3 (base, base,
1532 GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1536 /* Combined offset still fits in the displacement field.
1537 (We cannot overflow it at the high end.) */
1538 out[1] = adjust_automodify_address
1540 gen_rtx_POST_MODIFY (Pmode, base,
1541 gen_rtx_PLUS (Pmode, base,
1542 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1560 /* Split a TImode or TFmode move instruction after reload.
1561 This is used by *movtf_internal and *movti_internal. */
1563 ia64_split_tmode_move (rtx operands[])
1565 rtx in[2], out[2], insn;
1568 bool reversed = false;
1570 /* It is possible for reload to decide to overwrite a pointer with
1571 the value it points to. In that case we have to do the loads in
1572 the appropriate order so that the pointer is not destroyed too
1573 early. Also we must not generate a postmodify for that second
1574 load, or rws_access_regno will abort. */
1575 if (GET_CODE (operands[1]) == MEM
1576 && reg_overlap_mentioned_p (operands[0], operands[1]))
1578 rtx base = XEXP (operands[1], 0);
1579 while (GET_CODE (base) != REG)
1580 base = XEXP (base, 0);
1582 if (REGNO (base) == REGNO (operands[0]))
1586 /* Another reason to do the moves in reversed order is if the first
1587 element of the target register pair is also the second element of
1588 the source register pair. */
1589 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1590 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1593 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1594 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1596 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1597 if (GET_CODE (EXP) == MEM \
1598 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1599 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1600 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1601 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1602 XEXP (XEXP (EXP, 0), 0), \
1605 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1606 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1607 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1609 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1610 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1611 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1614 emit_insn (fixup[0]);
1616 emit_insn (fixup[1]);
1618 #undef MAYBE_ADD_REG_INC_NOTE
1621 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1622 through memory plus an extra GR scratch register. Except that you can
1623 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1624 SECONDARY_RELOAD_CLASS, but not both.
1626 We got into problems in the first place by allowing a construct like
1627 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1628 This solution attempts to prevent this situation from occurring. When
1629 we see something like the above, we spill the inner register to memory. */
1632 spill_xfmode_operand (rtx in, int force)
1634 if (GET_CODE (in) == SUBREG
1635 && GET_MODE (SUBREG_REG (in)) == TImode
1636 && GET_CODE (SUBREG_REG (in)) == REG)
1638 rtx memt = assign_stack_temp (TImode, 16, 0);
1639 emit_move_insn (memt, SUBREG_REG (in));
1640 return adjust_address (memt, XFmode, 0);
1642 else if (force && GET_CODE (in) == REG)
1644 rtx memx = assign_stack_temp (XFmode, 16, 0);
1645 emit_move_insn (memx, in);
1652 /* Emit comparison instruction if necessary, returning the expression
1653 that holds the compare result in the proper mode. */
1655 static GTY(()) rtx cmptf_libfunc;
1658 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1660 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1663 /* If we have a BImode input, then we already have a compare result, and
1664 do not need to emit another comparison. */
1665 if (GET_MODE (op0) == BImode)
1667 if ((code == NE || code == EQ) && op1 == const0_rtx)
1672 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1673 magic number as its third argument, that indicates what to do.
1674 The return value is an integer to be compared against zero. */
1675 else if (GET_MODE (op0) == TFmode)
1678 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1684 enum rtx_code ncode;
1686 if (!cmptf_libfunc || GET_MODE (op1) != TFmode)
1690 /* 1 = equal, 0 = not equal. Equality operators do
1691 not raise FP_INVALID when given an SNaN operand. */
1692 case EQ: magic = QCMP_EQ; ncode = NE; break;
1693 case NE: magic = QCMP_EQ; ncode = EQ; break;
1694 /* isunordered() from C99. */
1695 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1696 /* Relational operators raise FP_INVALID when given
1698 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1699 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1700 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1701 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1702 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1703 Expanders for buneq etc. weuld have to be added to ia64.md
1704 for this to be useful. */
1710 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1711 op0, TFmode, op1, TFmode,
1712 GEN_INT (magic), DImode);
1713 cmp = gen_reg_rtx (BImode);
1714 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1715 gen_rtx_fmt_ee (ncode, BImode,
1718 insns = get_insns ();
1721 emit_libcall_block (insns, cmp, cmp,
1722 gen_rtx_fmt_ee (code, BImode, op0, op1));
1727 cmp = gen_reg_rtx (BImode);
1728 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1729 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1733 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1736 /* Emit the appropriate sequence for a call. */
1739 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1744 addr = XEXP (addr, 0);
1745 addr = convert_memory_address (DImode, addr);
1746 b0 = gen_rtx_REG (DImode, R_BR (0));
1748 /* ??? Should do this for functions known to bind local too. */
1749 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1752 insn = gen_sibcall_nogp (addr);
1754 insn = gen_call_nogp (addr, b0);
1756 insn = gen_call_value_nogp (retval, addr, b0);
1757 insn = emit_call_insn (insn);
1762 insn = gen_sibcall_gp (addr);
1764 insn = gen_call_gp (addr, b0);
1766 insn = gen_call_value_gp (retval, addr, b0);
1767 insn = emit_call_insn (insn);
1769 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1773 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1777 ia64_reload_gp (void)
1781 if (current_frame_info.reg_save_gp)
1782 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1785 HOST_WIDE_INT offset;
1787 offset = (current_frame_info.spill_cfa_off
1788 + current_frame_info.spill_size);
1789 if (frame_pointer_needed)
1791 tmp = hard_frame_pointer_rtx;
1796 tmp = stack_pointer_rtx;
1797 offset = current_frame_info.total_size - offset;
1800 if (CONST_OK_FOR_I (offset))
1801 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1802 tmp, GEN_INT (offset)));
1805 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1806 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1807 pic_offset_table_rtx, tmp));
1810 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1813 emit_move_insn (pic_offset_table_rtx, tmp);
1817 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1818 rtx scratch_b, int noreturn_p, int sibcall_p)
1821 bool is_desc = false;
1823 /* If we find we're calling through a register, then we're actually
1824 calling through a descriptor, so load up the values. */
1825 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1830 /* ??? We are currently constrained to *not* use peep2, because
1831 we can legitimately change the global lifetime of the GP
1832 (in the form of killing where previously live). This is
1833 because a call through a descriptor doesn't use the previous
1834 value of the GP, while a direct call does, and we do not
1835 commit to either form until the split here.
1837 That said, this means that we lack precise life info for
1838 whether ADDR is dead after this call. This is not terribly
1839 important, since we can fix things up essentially for free
1840 with the POST_DEC below, but it's nice to not use it when we
1841 can immediately tell it's not necessary. */
1842 addr_dead_p = ((noreturn_p || sibcall_p
1843 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1845 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1847 /* Load the code address into scratch_b. */
1848 tmp = gen_rtx_POST_INC (Pmode, addr);
1849 tmp = gen_rtx_MEM (Pmode, tmp);
1850 emit_move_insn (scratch_r, tmp);
1851 emit_move_insn (scratch_b, scratch_r);
1853 /* Load the GP address. If ADDR is not dead here, then we must
1854 revert the change made above via the POST_INCREMENT. */
1856 tmp = gen_rtx_POST_DEC (Pmode, addr);
1859 tmp = gen_rtx_MEM (Pmode, tmp);
1860 emit_move_insn (pic_offset_table_rtx, tmp);
1867 insn = gen_sibcall_nogp (addr);
1869 insn = gen_call_value_nogp (retval, addr, retaddr);
1871 insn = gen_call_nogp (addr, retaddr);
1872 emit_call_insn (insn);
1874 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1878 /* Begin the assembly file. */
1881 ia64_file_start (void)
1883 default_file_start ();
1884 emit_safe_across_calls ();
1888 emit_safe_across_calls (void)
1890 unsigned int rs, re;
1897 while (rs < 64 && call_used_regs[PR_REG (rs)])
1901 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1905 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1909 fputc (',', asm_out_file);
1911 fprintf (asm_out_file, "p%u", rs);
1913 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1917 fputc ('\n', asm_out_file);
1920 /* Helper function for ia64_compute_frame_size: find an appropriate general
1921 register to spill some special register to. SPECIAL_SPILL_MASK contains
1922 bits in GR0 to GR31 that have already been allocated by this routine.
1923 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1926 find_gr_spill (int try_locals)
1930 /* If this is a leaf function, first try an otherwise unused
1931 call-clobbered register. */
1932 if (current_function_is_leaf)
1934 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1935 if (! regs_ever_live[regno]
1936 && call_used_regs[regno]
1937 && ! fixed_regs[regno]
1938 && ! global_regs[regno]
1939 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1941 current_frame_info.gr_used_mask |= 1 << regno;
1948 regno = current_frame_info.n_local_regs;
1949 /* If there is a frame pointer, then we can't use loc79, because
1950 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1951 reg_name switching code in ia64_expand_prologue. */
1952 if (regno < (80 - frame_pointer_needed))
1954 current_frame_info.n_local_regs = regno + 1;
1955 return LOC_REG (0) + regno;
1959 /* Failed to find a general register to spill to. Must use stack. */
1963 /* In order to make for nice schedules, we try to allocate every temporary
1964 to a different register. We must of course stay away from call-saved,
1965 fixed, and global registers. We must also stay away from registers
1966 allocated in current_frame_info.gr_used_mask, since those include regs
1967 used all through the prologue.
1969 Any register allocated here must be used immediately. The idea is to
1970 aid scheduling, not to solve data flow problems. */
1972 static int last_scratch_gr_reg;
1975 next_scratch_gr_reg (void)
1979 for (i = 0; i < 32; ++i)
1981 regno = (last_scratch_gr_reg + i + 1) & 31;
1982 if (call_used_regs[regno]
1983 && ! fixed_regs[regno]
1984 && ! global_regs[regno]
1985 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1987 last_scratch_gr_reg = regno;
1992 /* There must be _something_ available. */
1996 /* Helper function for ia64_compute_frame_size, called through
1997 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2000 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2002 unsigned int regno = REGNO (reg);
2005 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2006 for (i = 0; i < n; ++i)
2007 current_frame_info.gr_used_mask |= 1 << (regno + i);
2011 /* Returns the number of bytes offset between the frame pointer and the stack
2012 pointer for the current function. SIZE is the number of bytes of space
2013 needed for local variables. */
2016 ia64_compute_frame_size (HOST_WIDE_INT size)
2018 HOST_WIDE_INT total_size;
2019 HOST_WIDE_INT spill_size = 0;
2020 HOST_WIDE_INT extra_spill_size = 0;
2021 HOST_WIDE_INT pretend_args_size;
2024 int spilled_gr_p = 0;
2025 int spilled_fr_p = 0;
2029 if (current_frame_info.initialized)
2032 memset (¤t_frame_info, 0, sizeof current_frame_info);
2033 CLEAR_HARD_REG_SET (mask);
2035 /* Don't allocate scratches to the return register. */
2036 diddle_return_value (mark_reg_gr_used_mask, NULL);
2038 /* Don't allocate scratches to the EH scratch registers. */
2039 if (cfun->machine->ia64_eh_epilogue_sp)
2040 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2041 if (cfun->machine->ia64_eh_epilogue_bsp)
2042 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2044 /* Find the size of the register stack frame. We have only 80 local
2045 registers, because we reserve 8 for the inputs and 8 for the
2048 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2049 since we'll be adjusting that down later. */
2050 regno = LOC_REG (78) + ! frame_pointer_needed;
2051 for (; regno >= LOC_REG (0); regno--)
2052 if (regs_ever_live[regno])
2054 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2056 /* For functions marked with the syscall_linkage attribute, we must mark
2057 all eight input registers as in use, so that locals aren't visible to
2060 if (cfun->machine->n_varargs > 0
2061 || lookup_attribute ("syscall_linkage",
2062 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2063 current_frame_info.n_input_regs = 8;
2066 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2067 if (regs_ever_live[regno])
2069 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2072 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2073 if (regs_ever_live[regno])
2075 i = regno - OUT_REG (0) + 1;
2077 /* When -p profiling, we need one output register for the mcount argument.
2078 Likewise for -a profiling for the bb_init_func argument. For -ax
2079 profiling, we need two output registers for the two bb_init_trace_func
2081 if (current_function_profile)
2083 current_frame_info.n_output_regs = i;
2085 /* ??? No rotating register support yet. */
2086 current_frame_info.n_rotate_regs = 0;
2088 /* Discover which registers need spilling, and how much room that
2089 will take. Begin with floating point and general registers,
2090 which will always wind up on the stack. */
2092 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2093 if (regs_ever_live[regno] && ! call_used_regs[regno])
2095 SET_HARD_REG_BIT (mask, regno);
2101 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2102 if (regs_ever_live[regno] && ! call_used_regs[regno])
2104 SET_HARD_REG_BIT (mask, regno);
2110 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2111 if (regs_ever_live[regno] && ! call_used_regs[regno])
2113 SET_HARD_REG_BIT (mask, regno);
2118 /* Now come all special registers that might get saved in other
2119 general registers. */
2121 if (frame_pointer_needed)
2123 current_frame_info.reg_fp = find_gr_spill (1);
2124 /* If we did not get a register, then we take LOC79. This is guaranteed
2125 to be free, even if regs_ever_live is already set, because this is
2126 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2127 as we don't count loc79 above. */
2128 if (current_frame_info.reg_fp == 0)
2130 current_frame_info.reg_fp = LOC_REG (79);
2131 current_frame_info.n_local_regs++;
2135 if (! current_function_is_leaf)
2137 /* Emit a save of BR0 if we call other functions. Do this even
2138 if this function doesn't return, as EH depends on this to be
2139 able to unwind the stack. */
2140 SET_HARD_REG_BIT (mask, BR_REG (0));
2142 current_frame_info.reg_save_b0 = find_gr_spill (1);
2143 if (current_frame_info.reg_save_b0 == 0)
2149 /* Similarly for ar.pfs. */
2150 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2151 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2152 if (current_frame_info.reg_save_ar_pfs == 0)
2154 extra_spill_size += 8;
2158 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2159 registers are clobbered, so we fall back to the stack. */
2160 current_frame_info.reg_save_gp
2161 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2162 if (current_frame_info.reg_save_gp == 0)
2164 SET_HARD_REG_BIT (mask, GR_REG (1));
2171 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2173 SET_HARD_REG_BIT (mask, BR_REG (0));
2178 if (regs_ever_live[AR_PFS_REGNUM])
2180 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2181 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2182 if (current_frame_info.reg_save_ar_pfs == 0)
2184 extra_spill_size += 8;
2190 /* Unwind descriptor hackery: things are most efficient if we allocate
2191 consecutive GR save registers for RP, PFS, FP in that order. However,
2192 it is absolutely critical that FP get the only hard register that's
2193 guaranteed to be free, so we allocated it first. If all three did
2194 happen to be allocated hard regs, and are consecutive, rearrange them
2195 into the preferred order now. */
2196 if (current_frame_info.reg_fp != 0
2197 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2198 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2200 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2201 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2202 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2205 /* See if we need to store the predicate register block. */
2206 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2207 if (regs_ever_live[regno] && ! call_used_regs[regno])
2209 if (regno <= PR_REG (63))
2211 SET_HARD_REG_BIT (mask, PR_REG (0));
2212 current_frame_info.reg_save_pr = find_gr_spill (1);
2213 if (current_frame_info.reg_save_pr == 0)
2215 extra_spill_size += 8;
2219 /* ??? Mark them all as used so that register renaming and such
2220 are free to use them. */
2221 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2222 regs_ever_live[regno] = 1;
2225 /* If we're forced to use st8.spill, we're forced to save and restore
2226 ar.unat as well. The check for existing liveness allows inline asm
2227 to touch ar.unat. */
2228 if (spilled_gr_p || cfun->machine->n_varargs
2229 || regs_ever_live[AR_UNAT_REGNUM])
2231 regs_ever_live[AR_UNAT_REGNUM] = 1;
2232 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2233 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2234 if (current_frame_info.reg_save_ar_unat == 0)
2236 extra_spill_size += 8;
2241 if (regs_ever_live[AR_LC_REGNUM])
2243 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2244 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2245 if (current_frame_info.reg_save_ar_lc == 0)
2247 extra_spill_size += 8;
2252 /* If we have an odd number of words of pretend arguments written to
2253 the stack, then the FR save area will be unaligned. We round the
2254 size of this area up to keep things 16 byte aligned. */
2256 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2258 pretend_args_size = current_function_pretend_args_size;
2260 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2261 + current_function_outgoing_args_size);
2262 total_size = IA64_STACK_ALIGN (total_size);
2264 /* We always use the 16-byte scratch area provided by the caller, but
2265 if we are a leaf function, there's no one to which we need to provide
2267 if (current_function_is_leaf)
2268 total_size = MAX (0, total_size - 16);
2270 current_frame_info.total_size = total_size;
2271 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2272 current_frame_info.spill_size = spill_size;
2273 current_frame_info.extra_spill_size = extra_spill_size;
2274 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2275 current_frame_info.n_spilled = n_spilled;
2276 current_frame_info.initialized = reload_completed;
2279 /* Compute the initial difference between the specified pair of registers. */
2282 ia64_initial_elimination_offset (int from, int to)
2284 HOST_WIDE_INT offset;
2286 ia64_compute_frame_size (get_frame_size ());
2289 case FRAME_POINTER_REGNUM:
2290 if (to == HARD_FRAME_POINTER_REGNUM)
2292 if (current_function_is_leaf)
2293 offset = -current_frame_info.total_size;
2295 offset = -(current_frame_info.total_size
2296 - current_function_outgoing_args_size - 16);
2298 else if (to == STACK_POINTER_REGNUM)
2300 if (current_function_is_leaf)
2303 offset = 16 + current_function_outgoing_args_size;
2309 case ARG_POINTER_REGNUM:
2310 /* Arguments start above the 16 byte save area, unless stdarg
2311 in which case we store through the 16 byte save area. */
2312 if (to == HARD_FRAME_POINTER_REGNUM)
2313 offset = 16 - current_function_pretend_args_size;
2314 else if (to == STACK_POINTER_REGNUM)
2315 offset = (current_frame_info.total_size
2316 + 16 - current_function_pretend_args_size);
2328 /* If there are more than a trivial number of register spills, we use
2329 two interleaved iterators so that we can get two memory references
2332 In order to simplify things in the prologue and epilogue expanders,
2333 we use helper functions to fix up the memory references after the
2334 fact with the appropriate offsets to a POST_MODIFY memory mode.
2335 The following data structure tracks the state of the two iterators
2336 while insns are being emitted. */
2338 struct spill_fill_data
2340 rtx init_after; /* point at which to emit initializations */
2341 rtx init_reg[2]; /* initial base register */
2342 rtx iter_reg[2]; /* the iterator registers */
2343 rtx *prev_addr[2]; /* address of last memory use */
2344 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2345 HOST_WIDE_INT prev_off[2]; /* last offset */
2346 int n_iter; /* number of iterators in use */
2347 int next_iter; /* next iterator to use */
2348 unsigned int save_gr_used_mask;
2351 static struct spill_fill_data spill_fill_data;
2354 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2358 spill_fill_data.init_after = get_last_insn ();
2359 spill_fill_data.init_reg[0] = init_reg;
2360 spill_fill_data.init_reg[1] = init_reg;
2361 spill_fill_data.prev_addr[0] = NULL;
2362 spill_fill_data.prev_addr[1] = NULL;
2363 spill_fill_data.prev_insn[0] = NULL;
2364 spill_fill_data.prev_insn[1] = NULL;
2365 spill_fill_data.prev_off[0] = cfa_off;
2366 spill_fill_data.prev_off[1] = cfa_off;
2367 spill_fill_data.next_iter = 0;
2368 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2370 spill_fill_data.n_iter = 1 + (n_spills > 2);
2371 for (i = 0; i < spill_fill_data.n_iter; ++i)
2373 int regno = next_scratch_gr_reg ();
2374 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2375 current_frame_info.gr_used_mask |= 1 << regno;
2380 finish_spill_pointers (void)
2382 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2386 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2388 int iter = spill_fill_data.next_iter;
2389 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2390 rtx disp_rtx = GEN_INT (disp);
2393 if (spill_fill_data.prev_addr[iter])
2395 if (CONST_OK_FOR_N (disp))
2397 *spill_fill_data.prev_addr[iter]
2398 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2399 gen_rtx_PLUS (DImode,
2400 spill_fill_data.iter_reg[iter],
2402 REG_NOTES (spill_fill_data.prev_insn[iter])
2403 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2404 REG_NOTES (spill_fill_data.prev_insn[iter]));
2408 /* ??? Could use register post_modify for loads. */
2409 if (! CONST_OK_FOR_I (disp))
2411 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2412 emit_move_insn (tmp, disp_rtx);
2415 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2416 spill_fill_data.iter_reg[iter], disp_rtx));
2419 /* Micro-optimization: if we've created a frame pointer, it's at
2420 CFA 0, which may allow the real iterator to be initialized lower,
2421 slightly increasing parallelism. Also, if there are few saves
2422 it may eliminate the iterator entirely. */
2424 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2425 && frame_pointer_needed)
2427 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2428 set_mem_alias_set (mem, get_varargs_alias_set ());
2436 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2437 spill_fill_data.init_reg[iter]);
2442 if (! CONST_OK_FOR_I (disp))
2444 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2445 emit_move_insn (tmp, disp_rtx);
2449 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2450 spill_fill_data.init_reg[iter],
2457 /* Careful for being the first insn in a sequence. */
2458 if (spill_fill_data.init_after)
2459 insn = emit_insn_after (seq, spill_fill_data.init_after);
2462 rtx first = get_insns ();
2464 insn = emit_insn_before (seq, first);
2466 insn = emit_insn (seq);
2468 spill_fill_data.init_after = insn;
2470 /* If DISP is 0, we may or may not have a further adjustment
2471 afterward. If we do, then the load/store insn may be modified
2472 to be a post-modify. If we don't, then this copy may be
2473 eliminated by copyprop_hardreg_forward, which makes this
2474 insn garbage, which runs afoul of the sanity check in
2475 propagate_one_insn. So mark this insn as legal to delete. */
2477 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2481 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2483 /* ??? Not all of the spills are for varargs, but some of them are.
2484 The rest of the spills belong in an alias set of their own. But
2485 it doesn't actually hurt to include them here. */
2486 set_mem_alias_set (mem, get_varargs_alias_set ());
2488 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2489 spill_fill_data.prev_off[iter] = cfa_off;
2491 if (++iter >= spill_fill_data.n_iter)
2493 spill_fill_data.next_iter = iter;
2499 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2502 int iter = spill_fill_data.next_iter;
2505 mem = spill_restore_mem (reg, cfa_off);
2506 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2507 spill_fill_data.prev_insn[iter] = insn;
2514 RTX_FRAME_RELATED_P (insn) = 1;
2516 /* Don't even pretend that the unwind code can intuit its way
2517 through a pair of interleaved post_modify iterators. Just
2518 provide the correct answer. */
2520 if (frame_pointer_needed)
2522 base = hard_frame_pointer_rtx;
2527 base = stack_pointer_rtx;
2528 off = current_frame_info.total_size - cfa_off;
2532 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2533 gen_rtx_SET (VOIDmode,
2534 gen_rtx_MEM (GET_MODE (reg),
2535 plus_constant (base, off)),
2542 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2544 int iter = spill_fill_data.next_iter;
2547 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2548 GEN_INT (cfa_off)));
2549 spill_fill_data.prev_insn[iter] = insn;
2552 /* Wrapper functions that discards the CONST_INT spill offset. These
2553 exist so that we can give gr_spill/gr_fill the offset they need and
2554 use a consistent function interface. */
2557 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2559 return gen_movdi (dest, src);
2563 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2565 return gen_fr_spill (dest, src);
2569 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2571 return gen_fr_restore (dest, src);
2574 /* Called after register allocation to add any instructions needed for the
2575 prologue. Using a prologue insn is favored compared to putting all of the
2576 instructions in output_function_prologue(), since it allows the scheduler
2577 to intermix instructions with the saves of the caller saved registers. In
2578 some cases, it might be necessary to emit a barrier instruction as the last
2579 insn to prevent such scheduling.
2581 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2582 so that the debug info generation code can handle them properly.
2584 The register save area is layed out like so:
2586 [ varargs spill area ]
2587 [ fr register spill area ]
2588 [ br register spill area ]
2589 [ ar register spill area ]
2590 [ pr register spill area ]
2591 [ gr register spill area ] */
2593 /* ??? Get inefficient code when the frame size is larger than can fit in an
2594 adds instruction. */
2597 ia64_expand_prologue (void)
2599 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2600 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2603 ia64_compute_frame_size (get_frame_size ());
2604 last_scratch_gr_reg = 15;
2606 /* If there is no epilogue, then we don't need some prologue insns.
2607 We need to avoid emitting the dead prologue insns, because flow
2608 will complain about them. */
2613 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2614 if ((e->flags & EDGE_FAKE) == 0
2615 && (e->flags & EDGE_FALLTHRU) != 0)
2617 epilogue_p = (e != NULL);
2622 /* Set the local, input, and output register names. We need to do this
2623 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2624 half. If we use in/loc/out register names, then we get assembler errors
2625 in crtn.S because there is no alloc insn or regstk directive in there. */
2626 if (! TARGET_REG_NAMES)
2628 int inputs = current_frame_info.n_input_regs;
2629 int locals = current_frame_info.n_local_regs;
2630 int outputs = current_frame_info.n_output_regs;
2632 for (i = 0; i < inputs; i++)
2633 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2634 for (i = 0; i < locals; i++)
2635 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2636 for (i = 0; i < outputs; i++)
2637 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2640 /* Set the frame pointer register name. The regnum is logically loc79,
2641 but of course we'll not have allocated that many locals. Rather than
2642 worrying about renumbering the existing rtxs, we adjust the name. */
2643 /* ??? This code means that we can never use one local register when
2644 there is a frame pointer. loc79 gets wasted in this case, as it is
2645 renamed to a register that will never be used. See also the try_locals
2646 code in find_gr_spill. */
2647 if (current_frame_info.reg_fp)
2649 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2650 reg_names[HARD_FRAME_POINTER_REGNUM]
2651 = reg_names[current_frame_info.reg_fp];
2652 reg_names[current_frame_info.reg_fp] = tmp;
2655 /* We don't need an alloc instruction if we've used no outputs or locals. */
2656 if (current_frame_info.n_local_regs == 0
2657 && current_frame_info.n_output_regs == 0
2658 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2659 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2661 /* If there is no alloc, but there are input registers used, then we
2662 need a .regstk directive. */
2663 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2664 ar_pfs_save_reg = NULL_RTX;
2668 current_frame_info.need_regstk = 0;
2670 if (current_frame_info.reg_save_ar_pfs)
2671 regno = current_frame_info.reg_save_ar_pfs;
2673 regno = next_scratch_gr_reg ();
2674 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2676 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2677 GEN_INT (current_frame_info.n_input_regs),
2678 GEN_INT (current_frame_info.n_local_regs),
2679 GEN_INT (current_frame_info.n_output_regs),
2680 GEN_INT (current_frame_info.n_rotate_regs)));
2681 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2684 /* Set up frame pointer, stack pointer, and spill iterators. */
2686 n_varargs = cfun->machine->n_varargs;
2687 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2688 stack_pointer_rtx, 0);
2690 if (frame_pointer_needed)
2692 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2693 RTX_FRAME_RELATED_P (insn) = 1;
2696 if (current_frame_info.total_size != 0)
2698 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2701 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2702 offset = frame_size_rtx;
2705 regno = next_scratch_gr_reg ();
2706 offset = gen_rtx_REG (DImode, regno);
2707 emit_move_insn (offset, frame_size_rtx);
2710 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2711 stack_pointer_rtx, offset));
2713 if (! frame_pointer_needed)
2715 RTX_FRAME_RELATED_P (insn) = 1;
2716 if (GET_CODE (offset) != CONST_INT)
2719 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2720 gen_rtx_SET (VOIDmode,
2722 gen_rtx_PLUS (DImode,
2729 /* ??? At this point we must generate a magic insn that appears to
2730 modify the stack pointer, the frame pointer, and all spill
2731 iterators. This would allow the most scheduling freedom. For
2732 now, just hard stop. */
2733 emit_insn (gen_blockage ());
2736 /* Must copy out ar.unat before doing any integer spills. */
2737 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2739 if (current_frame_info.reg_save_ar_unat)
2741 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2744 alt_regno = next_scratch_gr_reg ();
2745 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2746 current_frame_info.gr_used_mask |= 1 << alt_regno;
2749 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2750 insn = emit_move_insn (ar_unat_save_reg, reg);
2751 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2753 /* Even if we're not going to generate an epilogue, we still
2754 need to save the register so that EH works. */
2755 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2756 emit_insn (gen_prologue_use (ar_unat_save_reg));
2759 ar_unat_save_reg = NULL_RTX;
2761 /* Spill all varargs registers. Do this before spilling any GR registers,
2762 since we want the UNAT bits for the GR registers to override the UNAT
2763 bits from varargs, which we don't care about. */
2766 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2768 reg = gen_rtx_REG (DImode, regno);
2769 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2772 /* Locate the bottom of the register save area. */
2773 cfa_off = (current_frame_info.spill_cfa_off
2774 + current_frame_info.spill_size
2775 + current_frame_info.extra_spill_size);
2777 /* Save the predicate register block either in a register or in memory. */
2778 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2780 reg = gen_rtx_REG (DImode, PR_REG (0));
2781 if (current_frame_info.reg_save_pr != 0)
2783 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2784 insn = emit_move_insn (alt_reg, reg);
2786 /* ??? Denote pr spill/fill by a DImode move that modifies all
2787 64 hard registers. */
2788 RTX_FRAME_RELATED_P (insn) = 1;
2790 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2791 gen_rtx_SET (VOIDmode, alt_reg, reg),
2794 /* Even if we're not going to generate an epilogue, we still
2795 need to save the register so that EH works. */
2797 emit_insn (gen_prologue_use (alt_reg));
2801 alt_regno = next_scratch_gr_reg ();
2802 alt_reg = gen_rtx_REG (DImode, alt_regno);
2803 insn = emit_move_insn (alt_reg, reg);
2804 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2809 /* Handle AR regs in numerical order. All of them get special handling. */
2810 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2811 && current_frame_info.reg_save_ar_unat == 0)
2813 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2814 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2818 /* The alloc insn already copied ar.pfs into a general register. The
2819 only thing we have to do now is copy that register to a stack slot
2820 if we'd not allocated a local register for the job. */
2821 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2822 && current_frame_info.reg_save_ar_pfs == 0)
2824 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2825 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2829 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2831 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2832 if (current_frame_info.reg_save_ar_lc != 0)
2834 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2835 insn = emit_move_insn (alt_reg, reg);
2836 RTX_FRAME_RELATED_P (insn) = 1;
2838 /* Even if we're not going to generate an epilogue, we still
2839 need to save the register so that EH works. */
2841 emit_insn (gen_prologue_use (alt_reg));
2845 alt_regno = next_scratch_gr_reg ();
2846 alt_reg = gen_rtx_REG (DImode, alt_regno);
2847 emit_move_insn (alt_reg, reg);
2848 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2853 if (current_frame_info.reg_save_gp)
2855 insn = emit_move_insn (gen_rtx_REG (DImode,
2856 current_frame_info.reg_save_gp),
2857 pic_offset_table_rtx);
2858 /* We don't know for sure yet if this is actually needed, since
2859 we've not split the PIC call patterns. If all of the calls
2860 are indirect, and not followed by any uses of the gp, then
2861 this save is dead. Allow it to go away. */
2863 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2866 /* We should now be at the base of the gr/br/fr spill area. */
2867 if (cfa_off != (current_frame_info.spill_cfa_off
2868 + current_frame_info.spill_size))
2871 /* Spill all general registers. */
2872 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2873 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2875 reg = gen_rtx_REG (DImode, regno);
2876 do_spill (gen_gr_spill, reg, cfa_off, reg);
2880 /* Handle BR0 specially -- it may be getting stored permanently in
2881 some GR register. */
2882 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2884 reg = gen_rtx_REG (DImode, BR_REG (0));
2885 if (current_frame_info.reg_save_b0 != 0)
2887 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2888 insn = emit_move_insn (alt_reg, reg);
2889 RTX_FRAME_RELATED_P (insn) = 1;
2891 /* Even if we're not going to generate an epilogue, we still
2892 need to save the register so that EH works. */
2894 emit_insn (gen_prologue_use (alt_reg));
2898 alt_regno = next_scratch_gr_reg ();
2899 alt_reg = gen_rtx_REG (DImode, alt_regno);
2900 emit_move_insn (alt_reg, reg);
2901 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2906 /* Spill the rest of the BR registers. */
2907 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2908 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2910 alt_regno = next_scratch_gr_reg ();
2911 alt_reg = gen_rtx_REG (DImode, alt_regno);
2912 reg = gen_rtx_REG (DImode, regno);
2913 emit_move_insn (alt_reg, reg);
2914 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2918 /* Align the frame and spill all FR registers. */
2919 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2920 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2924 reg = gen_rtx_REG (XFmode, regno);
2925 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2929 if (cfa_off != current_frame_info.spill_cfa_off)
2932 finish_spill_pointers ();
2935 /* Called after register allocation to add any instructions needed for the
2936 epilogue. Using an epilogue insn is favored compared to putting all of the
2937 instructions in output_function_prologue(), since it allows the scheduler
2938 to intermix instructions with the saves of the caller saved registers. In
2939 some cases, it might be necessary to emit a barrier instruction as the last
2940 insn to prevent such scheduling. */
2943 ia64_expand_epilogue (int sibcall_p)
2945 rtx insn, reg, alt_reg, ar_unat_save_reg;
2946 int regno, alt_regno, cfa_off;
2948 ia64_compute_frame_size (get_frame_size ());
2950 /* If there is a frame pointer, then we use it instead of the stack
2951 pointer, so that the stack pointer does not need to be valid when
2952 the epilogue starts. See EXIT_IGNORE_STACK. */
2953 if (frame_pointer_needed)
2954 setup_spill_pointers (current_frame_info.n_spilled,
2955 hard_frame_pointer_rtx, 0);
2957 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2958 current_frame_info.total_size);
2960 if (current_frame_info.total_size != 0)
2962 /* ??? At this point we must generate a magic insn that appears to
2963 modify the spill iterators and the frame pointer. This would
2964 allow the most scheduling freedom. For now, just hard stop. */
2965 emit_insn (gen_blockage ());
2968 /* Locate the bottom of the register save area. */
2969 cfa_off = (current_frame_info.spill_cfa_off
2970 + current_frame_info.spill_size
2971 + current_frame_info.extra_spill_size);
2973 /* Restore the predicate registers. */
2974 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2976 if (current_frame_info.reg_save_pr != 0)
2977 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2980 alt_regno = next_scratch_gr_reg ();
2981 alt_reg = gen_rtx_REG (DImode, alt_regno);
2982 do_restore (gen_movdi_x, alt_reg, cfa_off);
2985 reg = gen_rtx_REG (DImode, PR_REG (0));
2986 emit_move_insn (reg, alt_reg);
2989 /* Restore the application registers. */
2991 /* Load the saved unat from the stack, but do not restore it until
2992 after the GRs have been restored. */
2993 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2995 if (current_frame_info.reg_save_ar_unat != 0)
2997 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3000 alt_regno = next_scratch_gr_reg ();
3001 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3002 current_frame_info.gr_used_mask |= 1 << alt_regno;
3003 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3008 ar_unat_save_reg = NULL_RTX;
3010 if (current_frame_info.reg_save_ar_pfs != 0)
3012 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
3013 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3014 emit_move_insn (reg, alt_reg);
3016 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3018 alt_regno = next_scratch_gr_reg ();
3019 alt_reg = gen_rtx_REG (DImode, alt_regno);
3020 do_restore (gen_movdi_x, alt_reg, cfa_off);
3022 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3023 emit_move_insn (reg, alt_reg);
3026 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3028 if (current_frame_info.reg_save_ar_lc != 0)
3029 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3032 alt_regno = next_scratch_gr_reg ();
3033 alt_reg = gen_rtx_REG (DImode, alt_regno);
3034 do_restore (gen_movdi_x, alt_reg, cfa_off);
3037 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3038 emit_move_insn (reg, alt_reg);
3041 /* We should now be at the base of the gr/br/fr spill area. */
3042 if (cfa_off != (current_frame_info.spill_cfa_off
3043 + current_frame_info.spill_size))
3046 /* The GP may be stored on the stack in the prologue, but it's
3047 never restored in the epilogue. Skip the stack slot. */
3048 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3051 /* Restore all general registers. */
3052 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3053 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3055 reg = gen_rtx_REG (DImode, regno);
3056 do_restore (gen_gr_restore, reg, cfa_off);
3060 /* Restore the branch registers. Handle B0 specially, as it may
3061 have gotten stored in some GR register. */
3062 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3064 if (current_frame_info.reg_save_b0 != 0)
3065 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3068 alt_regno = next_scratch_gr_reg ();
3069 alt_reg = gen_rtx_REG (DImode, alt_regno);
3070 do_restore (gen_movdi_x, alt_reg, cfa_off);
3073 reg = gen_rtx_REG (DImode, BR_REG (0));
3074 emit_move_insn (reg, alt_reg);
3077 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3078 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3080 alt_regno = next_scratch_gr_reg ();
3081 alt_reg = gen_rtx_REG (DImode, alt_regno);
3082 do_restore (gen_movdi_x, alt_reg, cfa_off);
3084 reg = gen_rtx_REG (DImode, regno);
3085 emit_move_insn (reg, alt_reg);
3088 /* Restore floating point registers. */
3089 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3090 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3094 reg = gen_rtx_REG (XFmode, regno);
3095 do_restore (gen_fr_restore_x, reg, cfa_off);
3099 /* Restore ar.unat for real. */
3100 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3102 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3103 emit_move_insn (reg, ar_unat_save_reg);
3106 if (cfa_off != current_frame_info.spill_cfa_off)
3109 finish_spill_pointers ();
3111 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3113 /* ??? At this point we must generate a magic insn that appears to
3114 modify the spill iterators, the stack pointer, and the frame
3115 pointer. This would allow the most scheduling freedom. For now,
3117 emit_insn (gen_blockage ());
3120 if (cfun->machine->ia64_eh_epilogue_sp)
3121 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3122 else if (frame_pointer_needed)
3124 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3125 RTX_FRAME_RELATED_P (insn) = 1;
3127 else if (current_frame_info.total_size)
3129 rtx offset, frame_size_rtx;
3131 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3132 if (CONST_OK_FOR_I (current_frame_info.total_size))
3133 offset = frame_size_rtx;
3136 regno = next_scratch_gr_reg ();
3137 offset = gen_rtx_REG (DImode, regno);
3138 emit_move_insn (offset, frame_size_rtx);
3141 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3144 RTX_FRAME_RELATED_P (insn) = 1;
3145 if (GET_CODE (offset) != CONST_INT)
3148 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3149 gen_rtx_SET (VOIDmode,
3151 gen_rtx_PLUS (DImode,
3158 if (cfun->machine->ia64_eh_epilogue_bsp)
3159 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3162 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3165 int fp = GR_REG (2);
3166 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3167 first available call clobbered register. If there was a frame_pointer
3168 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3169 so we have to make sure we're using the string "r2" when emitting
3170 the register name for the assembler. */
3171 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3172 fp = HARD_FRAME_POINTER_REGNUM;
3174 /* We must emit an alloc to force the input registers to become output
3175 registers. Otherwise, if the callee tries to pass its parameters
3176 through to another call without an intervening alloc, then these
3178 /* ??? We don't need to preserve all input registers. We only need to
3179 preserve those input registers used as arguments to the sibling call.
3180 It is unclear how to compute that number here. */
3181 if (current_frame_info.n_input_regs != 0)
3182 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3183 const0_rtx, const0_rtx,
3184 GEN_INT (current_frame_info.n_input_regs),
3189 /* Return 1 if br.ret can do all the work required to return from a
3193 ia64_direct_return (void)
3195 if (reload_completed && ! frame_pointer_needed)
3197 ia64_compute_frame_size (get_frame_size ());
3199 return (current_frame_info.total_size == 0
3200 && current_frame_info.n_spilled == 0
3201 && current_frame_info.reg_save_b0 == 0
3202 && current_frame_info.reg_save_pr == 0
3203 && current_frame_info.reg_save_ar_pfs == 0
3204 && current_frame_info.reg_save_ar_unat == 0
3205 && current_frame_info.reg_save_ar_lc == 0);
3210 /* Return the magic cookie that we use to hold the return address
3211 during early compilation. */
3214 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3218 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3221 /* Split this value after reload, now that we know where the return
3222 address is saved. */
3225 ia64_split_return_addr_rtx (rtx dest)
3229 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3231 if (current_frame_info.reg_save_b0 != 0)
3232 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3238 /* Compute offset from CFA for BR0. */
3239 /* ??? Must be kept in sync with ia64_expand_prologue. */
3240 off = (current_frame_info.spill_cfa_off
3241 + current_frame_info.spill_size);
3242 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3243 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3246 /* Convert CFA offset to a register based offset. */
3247 if (frame_pointer_needed)
3248 src = hard_frame_pointer_rtx;
3251 src = stack_pointer_rtx;
3252 off += current_frame_info.total_size;
3255 /* Load address into scratch register. */
3256 if (CONST_OK_FOR_I (off))
3257 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3260 emit_move_insn (dest, GEN_INT (off));
3261 emit_insn (gen_adddi3 (dest, src, dest));
3264 src = gen_rtx_MEM (Pmode, dest);
3268 src = gen_rtx_REG (DImode, BR_REG (0));
3270 emit_move_insn (dest, src);
3274 ia64_hard_regno_rename_ok (int from, int to)
3276 /* Don't clobber any of the registers we reserved for the prologue. */
3277 if (to == current_frame_info.reg_fp
3278 || to == current_frame_info.reg_save_b0
3279 || to == current_frame_info.reg_save_pr
3280 || to == current_frame_info.reg_save_ar_pfs
3281 || to == current_frame_info.reg_save_ar_unat
3282 || to == current_frame_info.reg_save_ar_lc)
3285 if (from == current_frame_info.reg_fp
3286 || from == current_frame_info.reg_save_b0
3287 || from == current_frame_info.reg_save_pr
3288 || from == current_frame_info.reg_save_ar_pfs
3289 || from == current_frame_info.reg_save_ar_unat
3290 || from == current_frame_info.reg_save_ar_lc)
3293 /* Don't use output registers outside the register frame. */
3294 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3297 /* Retain even/oddness on predicate register pairs. */
3298 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3299 return (from & 1) == (to & 1);
3304 /* Target hook for assembling integer objects. Handle word-sized
3305 aligned objects and detect the cases when @fptr is needed. */
3308 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3310 if (size == POINTER_SIZE / BITS_PER_UNIT
3312 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3313 && GET_CODE (x) == SYMBOL_REF
3314 && SYMBOL_REF_FUNCTION_P (x))
3316 if (POINTER_SIZE == 32)
3317 fputs ("\tdata4\t@fptr(", asm_out_file);
3319 fputs ("\tdata8\t@fptr(", asm_out_file);
3320 output_addr_const (asm_out_file, x);
3321 fputs (")\n", asm_out_file);
3324 return default_assemble_integer (x, size, aligned_p);
3327 /* Emit the function prologue. */
3330 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3332 int mask, grsave, grsave_prev;
3334 if (current_frame_info.need_regstk)
3335 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3336 current_frame_info.n_input_regs,
3337 current_frame_info.n_local_regs,
3338 current_frame_info.n_output_regs,
3339 current_frame_info.n_rotate_regs);
3341 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3344 /* Emit the .prologue directive. */
3347 grsave = grsave_prev = 0;
3348 if (current_frame_info.reg_save_b0 != 0)
3351 grsave = grsave_prev = current_frame_info.reg_save_b0;
3353 if (current_frame_info.reg_save_ar_pfs != 0
3354 && (grsave_prev == 0
3355 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3358 if (grsave_prev == 0)
3359 grsave = current_frame_info.reg_save_ar_pfs;
3360 grsave_prev = current_frame_info.reg_save_ar_pfs;
3362 if (current_frame_info.reg_fp != 0
3363 && (grsave_prev == 0
3364 || current_frame_info.reg_fp == grsave_prev + 1))
3367 if (grsave_prev == 0)
3368 grsave = HARD_FRAME_POINTER_REGNUM;
3369 grsave_prev = current_frame_info.reg_fp;
3371 if (current_frame_info.reg_save_pr != 0
3372 && (grsave_prev == 0
3373 || current_frame_info.reg_save_pr == grsave_prev + 1))
3376 if (grsave_prev == 0)
3377 grsave = current_frame_info.reg_save_pr;
3380 if (mask && TARGET_GNU_AS)
3381 fprintf (file, "\t.prologue %d, %d\n", mask,
3382 ia64_dbx_register_number (grsave));
3384 fputs ("\t.prologue\n", file);
3386 /* Emit a .spill directive, if necessary, to relocate the base of
3387 the register spill area. */
3388 if (current_frame_info.spill_cfa_off != -16)
3389 fprintf (file, "\t.spill %ld\n",
3390 (long) (current_frame_info.spill_cfa_off
3391 + current_frame_info.spill_size));
3394 /* Emit the .body directive at the scheduled end of the prologue. */
3397 ia64_output_function_end_prologue (FILE *file)
3399 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3402 fputs ("\t.body\n", file);
3405 /* Emit the function epilogue. */
3408 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3409 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3413 if (current_frame_info.reg_fp)
3415 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3416 reg_names[HARD_FRAME_POINTER_REGNUM]
3417 = reg_names[current_frame_info.reg_fp];
3418 reg_names[current_frame_info.reg_fp] = tmp;
3420 if (! TARGET_REG_NAMES)
3422 for (i = 0; i < current_frame_info.n_input_regs; i++)
3423 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3424 for (i = 0; i < current_frame_info.n_local_regs; i++)
3425 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3426 for (i = 0; i < current_frame_info.n_output_regs; i++)
3427 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3430 current_frame_info.initialized = 0;
3434 ia64_dbx_register_number (int regno)
3436 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3437 from its home at loc79 to something inside the register frame. We
3438 must perform the same renumbering here for the debug info. */
3439 if (current_frame_info.reg_fp)
3441 if (regno == HARD_FRAME_POINTER_REGNUM)
3442 regno = current_frame_info.reg_fp;
3443 else if (regno == current_frame_info.reg_fp)
3444 regno = HARD_FRAME_POINTER_REGNUM;
3447 if (IN_REGNO_P (regno))
3448 return 32 + regno - IN_REG (0);
3449 else if (LOC_REGNO_P (regno))
3450 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3451 else if (OUT_REGNO_P (regno))
3452 return (32 + current_frame_info.n_input_regs
3453 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3459 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3461 rtx addr_reg, eight = GEN_INT (8);
3463 /* The Intel assembler requires that the global __ia64_trampoline symbol
3464 be declared explicitly */
3467 static bool declared_ia64_trampoline = false;
3469 if (!declared_ia64_trampoline)
3471 declared_ia64_trampoline = true;
3472 (*targetm.asm_out.globalize_label) (asm_out_file,
3473 "__ia64_trampoline");
3477 /* Load up our iterator. */
3478 addr_reg = gen_reg_rtx (Pmode);
3479 emit_move_insn (addr_reg, addr);
3481 /* The first two words are the fake descriptor:
3482 __ia64_trampoline, ADDR+16. */
3483 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3484 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3485 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3487 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3488 copy_to_reg (plus_constant (addr, 16)));
3489 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3491 /* The third word is the target descriptor. */
3492 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3493 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3495 /* The fourth word is the static chain. */
3496 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3499 /* Do any needed setup for a variadic function. CUM has not been updated
3500 for the last named argument which has type TYPE and mode MODE.
3502 We generate the actual spill instructions during prologue generation. */
3505 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3506 tree type, int * pretend_size,
3507 int second_time ATTRIBUTE_UNUSED)
3509 CUMULATIVE_ARGS next_cum = *cum;
3511 /* Skip the current argument. */
3512 ia64_function_arg_advance (&next_cum, mode, type, 1);
3514 if (next_cum.words < MAX_ARGUMENT_SLOTS)
3516 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
3517 *pretend_size = n * UNITS_PER_WORD;
3518 cfun->machine->n_varargs = n;
3522 /* Check whether TYPE is a homogeneous floating point aggregate. If
3523 it is, return the mode of the floating point type that appears
3524 in all leafs. If it is not, return VOIDmode.
3526 An aggregate is a homogeneous floating point aggregate is if all
3527 fields/elements in it have the same floating point type (e.g,
3528 SFmode). 128-bit quad-precision floats are excluded. */
3530 static enum machine_mode
3531 hfa_element_mode (tree type, int nested)
3533 enum machine_mode element_mode = VOIDmode;
3534 enum machine_mode mode;
3535 enum tree_code code = TREE_CODE (type);
3536 int know_element_mode = 0;
3541 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3542 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3543 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3544 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
3548 /* Fortran complex types are supposed to be HFAs, so we need to handle
3549 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3552 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3553 && TYPE_MODE (type) != TCmode)
3554 return GET_MODE_INNER (TYPE_MODE (type));
3559 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3560 mode if this is contained within an aggregate. */
3561 if (nested && TYPE_MODE (type) != TFmode)
3562 return TYPE_MODE (type);
3567 return hfa_element_mode (TREE_TYPE (type), 1);
3571 case QUAL_UNION_TYPE:
3572 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3574 if (TREE_CODE (t) != FIELD_DECL)
3577 mode = hfa_element_mode (TREE_TYPE (t), 1);
3578 if (know_element_mode)
3580 if (mode != element_mode)
3583 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3587 know_element_mode = 1;
3588 element_mode = mode;
3591 return element_mode;
3594 /* If we reach here, we probably have some front-end specific type
3595 that the backend doesn't know about. This can happen via the
3596 aggregate_value_p call in init_function_start. All we can do is
3597 ignore unknown tree types. */
3604 /* Return the number of words required to hold a quantity of TYPE and MODE
3605 when passed as an argument. */
3607 ia64_function_arg_words (tree type, enum machine_mode mode)
3611 if (mode == BLKmode)
3612 words = int_size_in_bytes (type);
3614 words = GET_MODE_SIZE (mode);
3616 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3619 /* Return the number of registers that should be skipped so the current
3620 argument (described by TYPE and WORDS) will be properly aligned.
3622 Integer and float arguments larger than 8 bytes start at the next
3623 even boundary. Aggregates larger than 8 bytes start at the next
3624 even boundary if the aggregate has 16 byte alignment. Note that
3625 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3626 but are still to be aligned in registers.
3628 ??? The ABI does not specify how to handle aggregates with
3629 alignment from 9 to 15 bytes, or greater than 16. We handle them
3630 all as if they had 16 byte alignment. Such aggregates can occur
3631 only if gcc extensions are used. */
3633 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3635 if ((cum->words & 1) == 0)
3639 && TREE_CODE (type) != INTEGER_TYPE
3640 && TREE_CODE (type) != REAL_TYPE)
3641 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3646 /* Return rtx for register where argument is passed, or zero if it is passed
3648 /* ??? 128-bit quad-precision floats are always passed in general
3652 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3653 int named, int incoming)
3655 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3656 int words = ia64_function_arg_words (type, mode);
3657 int offset = ia64_function_arg_offset (cum, type, words);
3658 enum machine_mode hfa_mode = VOIDmode;
3660 /* If all argument slots are used, then it must go on the stack. */
3661 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3664 /* Check for and handle homogeneous FP aggregates. */
3666 hfa_mode = hfa_element_mode (type, 0);
3668 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3669 and unprototyped hfas are passed specially. */
3670 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3674 int fp_regs = cum->fp_regs;
3675 int int_regs = cum->words + offset;
3676 int hfa_size = GET_MODE_SIZE (hfa_mode);
3680 /* If prototyped, pass it in FR regs then GR regs.
3681 If not prototyped, pass it in both FR and GR regs.
3683 If this is an SFmode aggregate, then it is possible to run out of
3684 FR regs while GR regs are still left. In that case, we pass the
3685 remaining part in the GR regs. */
3687 /* Fill the FP regs. We do this always. We stop if we reach the end
3688 of the argument, the last FP register, or the last argument slot. */
3690 byte_size = ((mode == BLKmode)
3691 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3692 args_byte_size = int_regs * UNITS_PER_WORD;
3694 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3695 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3697 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3698 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3702 args_byte_size += hfa_size;
3706 /* If no prototype, then the whole thing must go in GR regs. */
3707 if (! cum->prototype)
3709 /* If this is an SFmode aggregate, then we might have some left over
3710 that needs to go in GR regs. */
3711 else if (byte_size != offset)
3712 int_regs += offset / UNITS_PER_WORD;
3714 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3716 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3718 enum machine_mode gr_mode = DImode;
3719 unsigned int gr_size;
3721 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3722 then this goes in a GR reg left adjusted/little endian, right
3723 adjusted/big endian. */
3724 /* ??? Currently this is handled wrong, because 4-byte hunks are
3725 always right adjusted/little endian. */
3728 /* If we have an even 4 byte hunk because the aggregate is a
3729 multiple of 4 bytes in size, then this goes in a GR reg right
3730 adjusted/little endian. */
3731 else if (byte_size - offset == 4)
3734 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3735 gen_rtx_REG (gr_mode, (basereg
3739 gr_size = GET_MODE_SIZE (gr_mode);
3741 if (gr_size == UNITS_PER_WORD
3742 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
3744 else if (gr_size > UNITS_PER_WORD)
3745 int_regs += gr_size / UNITS_PER_WORD;
3748 /* If we ended up using just one location, just return that one loc, but
3749 change the mode back to the argument mode. */
3751 return gen_rtx_REG (mode, REGNO (XEXP (loc[0], 0)));
3753 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3756 /* Integral and aggregates go in general registers. If we have run out of
3757 FR registers, then FP values must also go in general registers. This can
3758 happen when we have a SFmode HFA. */
3759 else if (mode == TFmode || mode == TCmode
3760 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3762 int byte_size = ((mode == BLKmode)
3763 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3764 if (BYTES_BIG_ENDIAN
3765 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3766 && byte_size < UNITS_PER_WORD
3769 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3770 gen_rtx_REG (DImode,
3771 (basereg + cum->words
3774 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3777 return gen_rtx_REG (mode, basereg + cum->words + offset);
3781 /* If there is a prototype, then FP values go in a FR register when
3782 named, and in a GR register when unnamed. */
3783 else if (cum->prototype)
3786 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3787 /* In big-endian mode, an anonymous SFmode value must be represented
3788 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3789 the value into the high half of the general register. */
3790 else if (BYTES_BIG_ENDIAN && mode == SFmode)
3791 return gen_rtx_PARALLEL (mode,
3793 gen_rtx_EXPR_LIST (VOIDmode,
3794 gen_rtx_REG (DImode, basereg + cum->words + offset),
3797 return gen_rtx_REG (mode, basereg + cum->words + offset);
3799 /* If there is no prototype, then FP values go in both FR and GR
3803 /* See comment above. */
3804 enum machine_mode inner_mode =
3805 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
3807 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3808 gen_rtx_REG (mode, (FR_ARG_FIRST
3811 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3812 gen_rtx_REG (inner_mode,
3813 (basereg + cum->words
3817 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3821 /* Return number of words, at the beginning of the argument, that must be
3822 put in registers. 0 is the argument is entirely in registers or entirely
3826 ia64_function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3827 tree type, int named ATTRIBUTE_UNUSED)
3829 int words = ia64_function_arg_words (type, mode);
3830 int offset = ia64_function_arg_offset (cum, type, words);
3832 /* If all argument slots are used, then it must go on the stack. */
3833 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3836 /* It doesn't matter whether the argument goes in FR or GR regs. If
3837 it fits within the 8 argument slots, then it goes entirely in
3838 registers. If it extends past the last argument slot, then the rest
3839 goes on the stack. */
3841 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3844 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3847 /* Update CUM to point after this argument. This is patterned after
3848 ia64_function_arg. */
3851 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3852 tree type, int named)
3854 int words = ia64_function_arg_words (type, mode);
3855 int offset = ia64_function_arg_offset (cum, type, words);
3856 enum machine_mode hfa_mode = VOIDmode;
3858 /* If all arg slots are already full, then there is nothing to do. */
3859 if (cum->words >= MAX_ARGUMENT_SLOTS)
3862 cum->words += words + offset;
3864 /* Check for and handle homogeneous FP aggregates. */
3866 hfa_mode = hfa_element_mode (type, 0);
3868 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3869 and unprototyped hfas are passed specially. */
3870 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3872 int fp_regs = cum->fp_regs;
3873 /* This is the original value of cum->words + offset. */
3874 int int_regs = cum->words - words;
3875 int hfa_size = GET_MODE_SIZE (hfa_mode);
3879 /* If prototyped, pass it in FR regs then GR regs.
3880 If not prototyped, pass it in both FR and GR regs.
3882 If this is an SFmode aggregate, then it is possible to run out of
3883 FR regs while GR regs are still left. In that case, we pass the
3884 remaining part in the GR regs. */
3886 /* Fill the FP regs. We do this always. We stop if we reach the end
3887 of the argument, the last FP register, or the last argument slot. */
3889 byte_size = ((mode == BLKmode)
3890 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3891 args_byte_size = int_regs * UNITS_PER_WORD;
3893 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3894 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3897 args_byte_size += hfa_size;
3901 cum->fp_regs = fp_regs;
3904 /* Integral and aggregates go in general registers. If we have run out of
3905 FR registers, then FP values must also go in general registers. This can
3906 happen when we have a SFmode HFA. */
3907 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3908 cum->int_regs = cum->words;
3910 /* If there is a prototype, then FP values go in a FR register when
3911 named, and in a GR register when unnamed. */
3912 else if (cum->prototype)
3915 cum->int_regs = cum->words;
3917 /* ??? Complex types should not reach here. */
3918 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3920 /* If there is no prototype, then FP values go in both FR and GR
3924 /* ??? Complex types should not reach here. */
3925 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3926 cum->int_regs = cum->words;
3930 /* Variable sized types are passed by reference. */
3931 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3934 ia64_function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3935 enum machine_mode mode ATTRIBUTE_UNUSED,
3936 tree type, int named ATTRIBUTE_UNUSED)
3938 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3941 /* True if it is OK to do sibling call optimization for the specified
3942 call expression EXP. DECL will be the called function, or NULL if
3943 this is an indirect call. */
3945 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3947 /* We must always return with our current GP. This means we can
3948 only sibcall to functions defined in the current module. */
3949 return decl && (*targetm.binds_local_p) (decl);
3953 /* Implement va_arg. */
3956 ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3958 /* Variable sized types are passed by reference. */
3959 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3961 tree ptrtype = build_pointer_type (type);
3962 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
3963 return build_fold_indirect_ref (addr);
3966 /* Aggregate arguments with alignment larger than 8 bytes start at
3967 the next even boundary. Integer and floating point arguments
3968 do so if they are larger than 8 bytes, whether or not they are
3969 also aligned larger than 8 bytes. */
3970 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
3971 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3973 tree t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3974 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3975 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3976 build_int_2 (-2 * UNITS_PER_WORD, -1));
3977 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3978 gimplify_and_add (t, pre_p);
3981 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3984 /* Return 1 if function return value returned in memory. Return 0 if it is
3988 ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
3990 enum machine_mode mode;
3991 enum machine_mode hfa_mode;
3992 HOST_WIDE_INT byte_size;
3994 mode = TYPE_MODE (valtype);
3995 byte_size = GET_MODE_SIZE (mode);
3996 if (mode == BLKmode)
3998 byte_size = int_size_in_bytes (valtype);
4003 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4005 hfa_mode = hfa_element_mode (valtype, 0);
4006 if (hfa_mode != VOIDmode)
4008 int hfa_size = GET_MODE_SIZE (hfa_mode);
4010 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4015 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4021 /* Return rtx for register that holds the function return value. */
4024 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
4026 enum machine_mode mode;
4027 enum machine_mode hfa_mode;
4029 mode = TYPE_MODE (valtype);
4030 hfa_mode = hfa_element_mode (valtype, 0);
4032 if (hfa_mode != VOIDmode)
4040 hfa_size = GET_MODE_SIZE (hfa_mode);
4041 byte_size = ((mode == BLKmode)
4042 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4044 for (i = 0; offset < byte_size; i++)
4046 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4047 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4053 return XEXP (loc[0], 0);
4055 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4057 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4058 return gen_rtx_REG (mode, FR_ARG_FIRST);
4061 if (BYTES_BIG_ENDIAN
4062 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4070 bytesize = int_size_in_bytes (valtype);
4071 for (i = 0; offset < bytesize; i++)
4073 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4074 gen_rtx_REG (DImode,
4077 offset += UNITS_PER_WORD;
4079 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4082 return gen_rtx_REG (mode, GR_RET_FIRST);
4086 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
4087 We need to emit DTP-relative relocations. */
4090 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4094 fputs ("\tdata8.ua\t@dtprel(", file);
4095 output_addr_const (file, x);
4099 /* Print a memory address as an operand to reference that memory location. */
4101 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4102 also call this from ia64_print_operand for memory addresses. */
4105 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4106 rtx address ATTRIBUTE_UNUSED)
4110 /* Print an operand to an assembler instruction.
4111 C Swap and print a comparison operator.
4112 D Print an FP comparison operator.
4113 E Print 32 - constant, for SImode shifts as extract.
4114 e Print 64 - constant, for DImode rotates.
4115 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4116 a floating point register emitted normally.
4117 I Invert a predicate register by adding 1.
4118 J Select the proper predicate register for a condition.
4119 j Select the inverse predicate register for a condition.
4120 O Append .acq for volatile load.
4121 P Postincrement of a MEM.
4122 Q Append .rel for volatile store.
4123 S Shift amount for shladd instruction.
4124 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4125 for Intel assembler.
4126 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4127 for Intel assembler.
4128 r Print register name, or constant 0 as r0. HP compatibility for
4131 ia64_print_operand (FILE * file, rtx x, int code)
4138 /* Handled below. */
4143 enum rtx_code c = swap_condition (GET_CODE (x));
4144 fputs (GET_RTX_NAME (c), file);
4149 switch (GET_CODE (x))
4161 str = GET_RTX_NAME (GET_CODE (x));
4168 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4172 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4176 if (x == CONST0_RTX (GET_MODE (x)))
4177 str = reg_names [FR_REG (0)];
4178 else if (x == CONST1_RTX (GET_MODE (x)))
4179 str = reg_names [FR_REG (1)];
4180 else if (GET_CODE (x) == REG)
4181 str = reg_names [REGNO (x)];
4188 fputs (reg_names [REGNO (x) + 1], file);
4194 unsigned int regno = REGNO (XEXP (x, 0));
4195 if (GET_CODE (x) == EQ)
4199 fputs (reg_names [regno], file);
4204 if (MEM_VOLATILE_P (x))
4205 fputs(".acq", file);
4210 HOST_WIDE_INT value;
4212 switch (GET_CODE (XEXP (x, 0)))
4218 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4219 if (GET_CODE (x) == CONST_INT)
4221 else if (GET_CODE (x) == REG)
4223 fprintf (file, ", %s", reg_names[REGNO (x)]);
4231 value = GET_MODE_SIZE (GET_MODE (x));
4235 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4239 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4244 if (MEM_VOLATILE_P (x))
4245 fputs(".rel", file);
4249 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4253 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4255 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4261 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4263 const char *prefix = "0x";
4264 if (INTVAL (x) & 0x80000000)
4266 fprintf (file, "0xffffffff");
4269 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4275 /* If this operand is the constant zero, write it as register zero.
4276 Any register, zero, or CONST_INT value is OK here. */
4277 if (GET_CODE (x) == REG)
4278 fputs (reg_names[REGNO (x)], file);
4279 else if (x == CONST0_RTX (GET_MODE (x)))
4281 else if (GET_CODE (x) == CONST_INT)
4282 output_addr_const (file, x);
4284 output_operand_lossage ("invalid %%r value");
4291 /* For conditional branches, returns or calls, substitute
4292 sptk, dptk, dpnt, or spnt for %s. */
4293 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4296 int pred_val = INTVAL (XEXP (x, 0));
4298 /* Guess top and bottom 10% statically predicted. */
4299 if (pred_val < REG_BR_PROB_BASE / 50)
4301 else if (pred_val < REG_BR_PROB_BASE / 2)
4303 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4308 else if (GET_CODE (current_output_insn) == CALL_INSN)
4313 fputs (which, file);
4318 x = current_insn_predicate;
4321 unsigned int regno = REGNO (XEXP (x, 0));
4322 if (GET_CODE (x) == EQ)
4324 fprintf (file, "(%s) ", reg_names [regno]);
4329 output_operand_lossage ("ia64_print_operand: unknown code");
4333 switch (GET_CODE (x))
4335 /* This happens for the spill/restore instructions. */
4340 /* ... fall through ... */
4343 fputs (reg_names [REGNO (x)], file);
4348 rtx addr = XEXP (x, 0);
4349 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4350 addr = XEXP (addr, 0);
4351 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4356 output_addr_const (file, x);
4363 /* Compute a (partial) cost for rtx X. Return true if the complete
4364 cost has been computed, and false if subexpressions should be
4365 scanned. In either case, *TOTAL contains the cost result. */
4366 /* ??? This is incomplete. */
4369 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4377 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4380 if (CONST_OK_FOR_I (INTVAL (x)))
4382 else if (CONST_OK_FOR_J (INTVAL (x)))
4385 *total = COSTS_N_INSNS (1);
4388 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4391 *total = COSTS_N_INSNS (1);
4396 *total = COSTS_N_INSNS (1);
4402 *total = COSTS_N_INSNS (3);
4406 /* For multiplies wider than HImode, we have to go to the FPU,
4407 which normally involves copies. Plus there's the latency
4408 of the multiply itself, and the latency of the instructions to
4409 transfer integer regs to FP regs. */
4410 /* ??? Check for FP mode. */
4411 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4412 *total = COSTS_N_INSNS (10);
4414 *total = COSTS_N_INSNS (2);
4422 *total = COSTS_N_INSNS (1);
4429 /* We make divide expensive, so that divide-by-constant will be
4430 optimized to a multiply. */
4431 *total = COSTS_N_INSNS (60);
4439 /* Calculate the cost of moving data from a register in class FROM to
4440 one in class TO, using MODE. */
4443 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4446 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4447 if (to == ADDL_REGS)
4449 if (from == ADDL_REGS)
4452 /* All costs are symmetric, so reduce cases by putting the
4453 lower number class as the destination. */
4456 enum reg_class tmp = to;
4457 to = from, from = tmp;
4460 /* Moving from FR<->GR in XFmode must be more expensive than 2,
4461 so that we get secondary memory reloads. Between FR_REGS,
4462 we have to make this at least as expensive as MEMORY_MOVE_COST
4463 to avoid spectacularly poor register class preferencing. */
4466 if (to != GR_REGS || from != GR_REGS)
4467 return MEMORY_MOVE_COST (mode, to, 0);
4475 /* Moving between PR registers takes two insns. */
4476 if (from == PR_REGS)
4478 /* Moving between PR and anything but GR is impossible. */
4479 if (from != GR_REGS)
4480 return MEMORY_MOVE_COST (mode, to, 0);
4484 /* Moving between BR and anything but GR is impossible. */
4485 if (from != GR_REGS && from != GR_AND_BR_REGS)
4486 return MEMORY_MOVE_COST (mode, to, 0);
4491 /* Moving between AR and anything but GR is impossible. */
4492 if (from != GR_REGS)
4493 return MEMORY_MOVE_COST (mode, to, 0);
4498 case GR_AND_FR_REGS:
4499 case GR_AND_BR_REGS:
4510 /* This function returns the register class required for a secondary
4511 register when copying between one of the registers in CLASS, and X,
4512 using MODE. A return value of NO_REGS means that no secondary register
4516 ia64_secondary_reload_class (enum reg_class class,
4517 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4521 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4522 regno = true_regnum (x);
4529 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4530 interaction. We end up with two pseudos with overlapping lifetimes
4531 both of which are equiv to the same constant, and both which need
4532 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4533 changes depending on the path length, which means the qty_first_reg
4534 check in make_regs_eqv can give different answers at different times.
4535 At some point I'll probably need a reload_indi pattern to handle
4538 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4539 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4540 non-general registers for good measure. */
4541 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4544 /* This is needed if a pseudo used as a call_operand gets spilled to a
4546 if (GET_CODE (x) == MEM)
4551 /* Need to go through general registers to get to other class regs. */
4552 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4555 /* This can happen when a paradoxical subreg is an operand to the
4557 /* ??? This shouldn't be necessary after instruction scheduling is
4558 enabled, because paradoxical subregs are not accepted by
4559 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4560 stop the paradoxical subreg stupidity in the *_operand functions
4562 if (GET_CODE (x) == MEM
4563 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4564 || GET_MODE (x) == QImode))
4567 /* This can happen because of the ior/and/etc patterns that accept FP
4568 registers as operands. If the third operand is a constant, then it
4569 needs to be reloaded into a FP register. */
4570 if (GET_CODE (x) == CONST_INT)
4573 /* This can happen because of register elimination in a muldi3 insn.
4574 E.g. `26107 * (unsigned long)&u'. */
4575 if (GET_CODE (x) == PLUS)
4580 /* ??? This happens if we cse/gcse a BImode value across a call,
4581 and the function has a nonlocal goto. This is because global
4582 does not allocate call crossing pseudos to hard registers when
4583 current_function_has_nonlocal_goto is true. This is relatively
4584 common for C++ programs that use exceptions. To reproduce,
4585 return NO_REGS and compile libstdc++. */
4586 if (GET_CODE (x) == MEM)
4589 /* This can happen when we take a BImode subreg of a DImode value,
4590 and that DImode value winds up in some non-GR register. */
4591 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4603 /* Emit text to declare externally defined variables and functions, because
4604 the Intel assembler does not support undefined externals. */
4607 ia64_asm_output_external (FILE *file, tree decl, const char *name)
4609 int save_referenced;
4611 /* GNU as does not need anything here, but the HP linker does need
4612 something for external functions. */
4616 || TREE_CODE (decl) != FUNCTION_DECL
4617 || strstr (name, "__builtin_") == name))
4620 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4621 the linker when we do this, so we need to be careful not to do this for
4622 builtin functions which have no library equivalent. Unfortunately, we
4623 can't tell here whether or not a function will actually be called by
4624 expand_expr, so we pull in library functions even if we may not need
4626 if (! strcmp (name, "__builtin_next_arg")
4627 || ! strcmp (name, "alloca")
4628 || ! strcmp (name, "__builtin_constant_p")
4629 || ! strcmp (name, "__builtin_args_info"))
4633 ia64_hpux_add_extern_decl (decl);
4636 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4638 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4639 if (TREE_CODE (decl) == FUNCTION_DECL)
4640 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4641 (*targetm.asm_out.globalize_label) (file, name);
4642 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4646 /* Parse the -mfixed-range= option string. */
4649 fix_range (const char *const_str)
4652 char *str, *dash, *comma;
4654 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4655 REG2 are either register names or register numbers. The effect
4656 of this option is to mark the registers in the range from REG1 to
4657 REG2 as ``fixed'' so they won't be used by the compiler. This is
4658 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4660 i = strlen (const_str);
4661 str = (char *) alloca (i + 1);
4662 memcpy (str, const_str, i + 1);
4666 dash = strchr (str, '-');
4669 warning ("value of -mfixed-range must have form REG1-REG2");
4674 comma = strchr (dash + 1, ',');
4678 first = decode_reg_name (str);
4681 warning ("unknown register name: %s", str);
4685 last = decode_reg_name (dash + 1);
4688 warning ("unknown register name: %s", dash + 1);
4696 warning ("%s-%s is an empty range", str, dash + 1);
4700 for (i = first; i <= last; ++i)
4701 fixed_regs[i] = call_used_regs[i] = 1;
4711 static struct machine_function *
4712 ia64_init_machine_status (void)
4714 return ggc_alloc_cleared (sizeof (struct machine_function));
4717 /* Handle TARGET_OPTIONS switches. */
4720 ia64_override_options (void)
4724 const char *const name; /* processor name or nickname. */
4725 const enum processor_type processor;
4727 const processor_alias_table[] =
4729 {"itanium", PROCESSOR_ITANIUM},
4730 {"itanium1", PROCESSOR_ITANIUM},
4731 {"merced", PROCESSOR_ITANIUM},
4732 {"itanium2", PROCESSOR_ITANIUM2},
4733 {"mckinley", PROCESSOR_ITANIUM2},
4736 int const pta_size = ARRAY_SIZE (processor_alias_table);
4739 if (TARGET_AUTO_PIC)
4740 target_flags |= MASK_CONST_GP;
4742 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4744 if ((target_flags_explicit & MASK_INLINE_FLOAT_DIV_LAT)
4745 && (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR))
4747 warning ("cannot optimize floating point division for both latency and throughput");
4748 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4752 if (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR)
4753 target_flags &= ~MASK_INLINE_FLOAT_DIV_LAT;
4755 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4759 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4761 if ((target_flags_explicit & MASK_INLINE_INT_DIV_LAT)
4762 && (target_flags_explicit & MASK_INLINE_INT_DIV_THR))
4764 warning ("cannot optimize integer division for both latency and throughput");
4765 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4769 if (target_flags_explicit & MASK_INLINE_INT_DIV_THR)
4770 target_flags &= ~MASK_INLINE_INT_DIV_LAT;
4772 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4776 if (TARGET_INLINE_SQRT_LAT && TARGET_INLINE_SQRT_THR)
4778 if ((target_flags_explicit & MASK_INLINE_SQRT_LAT)
4779 && (target_flags_explicit & MASK_INLINE_SQRT_THR))
4781 warning ("cannot optimize square root for both latency and throughput");
4782 target_flags &= ~MASK_INLINE_SQRT_THR;
4786 if (target_flags_explicit & MASK_INLINE_SQRT_THR)
4787 target_flags &= ~MASK_INLINE_SQRT_LAT;
4789 target_flags &= ~MASK_INLINE_SQRT_THR;
4793 if (TARGET_INLINE_SQRT_LAT)
4795 warning ("not yet implemented: latency-optimized inline square root");
4796 target_flags &= ~MASK_INLINE_SQRT_LAT;
4799 if (ia64_fixed_range_string)
4800 fix_range (ia64_fixed_range_string);
4802 if (ia64_tls_size_string)
4805 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4806 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4807 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4809 ia64_tls_size = tmp;
4812 if (!ia64_tune_string)
4813 ia64_tune_string = "itanium2";
4815 for (i = 0; i < pta_size; i++)
4816 if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
4818 ia64_tune = processor_alias_table[i].processor;
4823 error ("bad value (%s) for -tune= switch", ia64_tune_string);
4825 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4826 flag_schedule_insns_after_reload = 0;
4828 /* Variable tracking should be run after all optimizations which change order
4829 of insns. It also needs a valid CFG. */
4830 ia64_flag_var_tracking = flag_var_tracking;
4831 flag_var_tracking = 0;
4833 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4835 init_machine_status = ia64_init_machine_status;
4838 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4839 static enum attr_type ia64_safe_type (rtx);
4841 static enum attr_itanium_class
4842 ia64_safe_itanium_class (rtx insn)
4844 if (recog_memoized (insn) >= 0)
4845 return get_attr_itanium_class (insn);
4847 return ITANIUM_CLASS_UNKNOWN;
4850 static enum attr_type
4851 ia64_safe_type (rtx insn)
4853 if (recog_memoized (insn) >= 0)
4854 return get_attr_type (insn);
4856 return TYPE_UNKNOWN;
4859 /* The following collection of routines emit instruction group stop bits as
4860 necessary to avoid dependencies. */
4862 /* Need to track some additional registers as far as serialization is
4863 concerned so we can properly handle br.call and br.ret. We could
4864 make these registers visible to gcc, but since these registers are
4865 never explicitly used in gcc generated code, it seems wasteful to
4866 do so (plus it would make the call and return patterns needlessly
4868 #define REG_RP (BR_REG (0))
4869 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4870 /* This is used for volatile asms which may require a stop bit immediately
4871 before and after them. */
4872 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4873 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4874 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4876 /* For each register, we keep track of how it has been written in the
4877 current instruction group.
4879 If a register is written unconditionally (no qualifying predicate),
4880 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4882 If a register is written if its qualifying predicate P is true, we
4883 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4884 may be written again by the complement of P (P^1) and when this happens,
4885 WRITE_COUNT gets set to 2.
4887 The result of this is that whenever an insn attempts to write a register
4888 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4890 If a predicate register is written by a floating-point insn, we set
4891 WRITTEN_BY_FP to true.
4893 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4894 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4896 struct reg_write_state
4898 unsigned int write_count : 2;
4899 unsigned int first_pred : 16;
4900 unsigned int written_by_fp : 1;
4901 unsigned int written_by_and : 1;
4902 unsigned int written_by_or : 1;
4905 /* Cumulative info for the current instruction group. */
4906 struct reg_write_state rws_sum[NUM_REGS];
4907 /* Info for the current instruction. This gets copied to rws_sum after a
4908 stop bit is emitted. */
4909 struct reg_write_state rws_insn[NUM_REGS];
4911 /* Indicates whether this is the first instruction after a stop bit,
4912 in which case we don't need another stop bit. Without this, we hit
4913 the abort in ia64_variable_issue when scheduling an alloc. */
4914 static int first_instruction;
4916 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4917 RTL for one instruction. */
4920 unsigned int is_write : 1; /* Is register being written? */
4921 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4922 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4923 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4924 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4925 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4928 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4929 static int rws_access_regno (int, struct reg_flags, int);
4930 static int rws_access_reg (rtx, struct reg_flags, int);
4931 static void update_set_flags (rtx, struct reg_flags *, int *, rtx *);
4932 static int set_src_needs_barrier (rtx, struct reg_flags, int, rtx);
4933 static int rtx_needs_barrier (rtx, struct reg_flags, int);
4934 static void init_insn_group_barriers (void);
4935 static int group_barrier_needed_p (rtx);
4936 static int safe_group_barrier_needed_p (rtx);
4938 /* Update *RWS for REGNO, which is being written by the current instruction,
4939 with predicate PRED, and associated register flags in FLAGS. */
4942 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
4945 rws[regno].write_count++;
4947 rws[regno].write_count = 2;
4948 rws[regno].written_by_fp |= flags.is_fp;
4949 /* ??? Not tracking and/or across differing predicates. */
4950 rws[regno].written_by_and = flags.is_and;
4951 rws[regno].written_by_or = flags.is_or;
4952 rws[regno].first_pred = pred;
4955 /* Handle an access to register REGNO of type FLAGS using predicate register
4956 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4957 a dependency with an earlier instruction in the same group. */
4960 rws_access_regno (int regno, struct reg_flags flags, int pred)
4962 int need_barrier = 0;
4964 if (regno >= NUM_REGS)
4967 if (! PR_REGNO_P (regno))
4968 flags.is_and = flags.is_or = 0;
4974 /* One insn writes same reg multiple times? */
4975 if (rws_insn[regno].write_count > 0)
4978 /* Update info for current instruction. */
4979 rws_update (rws_insn, regno, flags, pred);
4980 write_count = rws_sum[regno].write_count;
4982 switch (write_count)
4985 /* The register has not been written yet. */
4986 rws_update (rws_sum, regno, flags, pred);
4990 /* The register has been written via a predicate. If this is
4991 not a complementary predicate, then we need a barrier. */
4992 /* ??? This assumes that P and P+1 are always complementary
4993 predicates for P even. */
4994 if (flags.is_and && rws_sum[regno].written_by_and)
4996 else if (flags.is_or && rws_sum[regno].written_by_or)
4998 else if ((rws_sum[regno].first_pred ^ 1) != pred)
5000 rws_update (rws_sum, regno, flags, pred);
5004 /* The register has been unconditionally written already. We
5006 if (flags.is_and && rws_sum[regno].written_by_and)
5008 else if (flags.is_or && rws_sum[regno].written_by_or)
5012 rws_sum[regno].written_by_and = flags.is_and;
5013 rws_sum[regno].written_by_or = flags.is_or;
5022 if (flags.is_branch)
5024 /* Branches have several RAW exceptions that allow to avoid
5027 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5028 /* RAW dependencies on branch regs are permissible as long
5029 as the writer is a non-branch instruction. Since we
5030 never generate code that uses a branch register written
5031 by a branch instruction, handling this case is
5035 if (REGNO_REG_CLASS (regno) == PR_REGS
5036 && ! rws_sum[regno].written_by_fp)
5037 /* The predicates of a branch are available within the
5038 same insn group as long as the predicate was written by
5039 something other than a floating-point instruction. */
5043 if (flags.is_and && rws_sum[regno].written_by_and)
5045 if (flags.is_or && rws_sum[regno].written_by_or)
5048 switch (rws_sum[regno].write_count)
5051 /* The register has not been written yet. */
5055 /* The register has been written via a predicate. If this is
5056 not a complementary predicate, then we need a barrier. */
5057 /* ??? This assumes that P and P+1 are always complementary
5058 predicates for P even. */
5059 if ((rws_sum[regno].first_pred ^ 1) != pred)
5064 /* The register has been unconditionally written already. We
5074 return need_barrier;
5078 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5080 int regno = REGNO (reg);
5081 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5084 return rws_access_regno (regno, flags, pred);
5087 int need_barrier = 0;
5089 need_barrier |= rws_access_regno (regno + n, flags, pred);
5090 return need_barrier;
5094 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5095 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5098 update_set_flags (rtx x, struct reg_flags *pflags, int *ppred, rtx *pcond)
5100 rtx src = SET_SRC (x);
5104 switch (GET_CODE (src))
5110 if (SET_DEST (x) == pc_rtx)
5111 /* X is a conditional branch. */
5115 int is_complemented = 0;
5117 /* X is a conditional move. */
5118 rtx cond = XEXP (src, 0);
5119 if (GET_CODE (cond) == EQ)
5120 is_complemented = 1;
5121 cond = XEXP (cond, 0);
5122 if (GET_CODE (cond) != REG
5123 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5126 if (XEXP (src, 1) == SET_DEST (x)
5127 || XEXP (src, 2) == SET_DEST (x))
5129 /* X is a conditional move that conditionally writes the
5132 /* We need another complement in this case. */
5133 if (XEXP (src, 1) == SET_DEST (x))
5134 is_complemented = ! is_complemented;
5136 *ppred = REGNO (cond);
5137 if (is_complemented)
5141 /* ??? If this is a conditional write to the dest, then this
5142 instruction does not actually read one source. This probably
5143 doesn't matter, because that source is also the dest. */
5144 /* ??? Multiple writes to predicate registers are allowed
5145 if they are all AND type compares, or if they are all OR
5146 type compares. We do not generate such instructions
5149 /* ... fall through ... */
5152 if (COMPARISON_P (src)
5153 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
5154 /* Set pflags->is_fp to 1 so that we know we're dealing
5155 with a floating point comparison when processing the
5156 destination of the SET. */
5159 /* Discover if this is a parallel comparison. We only handle
5160 and.orcm and or.andcm at present, since we must retain a
5161 strict inverse on the predicate pair. */
5162 else if (GET_CODE (src) == AND)
5164 else if (GET_CODE (src) == IOR)
5171 /* Subroutine of rtx_needs_barrier; this function determines whether the
5172 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5173 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5177 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred, rtx cond)
5179 int need_barrier = 0;
5181 rtx src = SET_SRC (x);
5183 if (GET_CODE (src) == CALL)
5184 /* We don't need to worry about the result registers that
5185 get written by subroutine call. */
5186 return rtx_needs_barrier (src, flags, pred);
5187 else if (SET_DEST (x) == pc_rtx)
5189 /* X is a conditional branch. */
5190 /* ??? This seems redundant, as the caller sets this bit for
5192 flags.is_branch = 1;
5193 return rtx_needs_barrier (src, flags, pred);
5196 need_barrier = rtx_needs_barrier (src, flags, pred);
5198 /* This instruction unconditionally uses a predicate register. */
5200 need_barrier |= rws_access_reg (cond, flags, 0);
5203 if (GET_CODE (dst) == ZERO_EXTRACT)
5205 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5206 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5207 dst = XEXP (dst, 0);
5209 return need_barrier;
5212 /* Handle an access to rtx X of type FLAGS using predicate register
5213 PRED. Return 1 if this access creates a dependency with an earlier
5214 instruction in the same group. */
5217 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5220 int is_complemented = 0;
5221 int need_barrier = 0;
5222 const char *format_ptr;
5223 struct reg_flags new_flags;
5231 switch (GET_CODE (x))
5234 update_set_flags (x, &new_flags, &pred, &cond);
5235 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
5236 if (GET_CODE (SET_SRC (x)) != CALL)
5238 new_flags.is_write = 1;
5239 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5244 new_flags.is_write = 0;
5245 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5247 /* Avoid multiple register writes, in case this is a pattern with
5248 multiple CALL rtx. This avoids an abort in rws_access_reg. */
5249 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5251 new_flags.is_write = 1;
5252 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5253 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5254 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5259 /* X is a predicated instruction. */
5261 cond = COND_EXEC_TEST (x);
5264 need_barrier = rtx_needs_barrier (cond, flags, 0);
5266 if (GET_CODE (cond) == EQ)
5267 is_complemented = 1;
5268 cond = XEXP (cond, 0);
5269 if (GET_CODE (cond) != REG
5270 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5272 pred = REGNO (cond);
5273 if (is_complemented)
5276 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5277 return need_barrier;
5281 /* Clobber & use are for earlier compiler-phases only. */
5286 /* We always emit stop bits for traditional asms. We emit stop bits
5287 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5288 if (GET_CODE (x) != ASM_OPERANDS
5289 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5291 /* Avoid writing the register multiple times if we have multiple
5292 asm outputs. This avoids an abort in rws_access_reg. */
5293 if (! rws_insn[REG_VOLATILE].write_count)
5295 new_flags.is_write = 1;
5296 rws_access_regno (REG_VOLATILE, new_flags, pred);
5301 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5302 We can not just fall through here since then we would be confused
5303 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5304 traditional asms unlike their normal usage. */
5306 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5307 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5312 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5314 rtx pat = XVECEXP (x, 0, i);
5315 if (GET_CODE (pat) == SET)
5317 update_set_flags (pat, &new_flags, &pred, &cond);
5318 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
5320 else if (GET_CODE (pat) == USE
5321 || GET_CODE (pat) == CALL
5322 || GET_CODE (pat) == ASM_OPERANDS)
5323 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5324 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
5327 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5329 rtx pat = XVECEXP (x, 0, i);
5330 if (GET_CODE (pat) == SET)
5332 if (GET_CODE (SET_SRC (pat)) != CALL)
5334 new_flags.is_write = 1;
5335 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5339 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5340 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5348 if (REGNO (x) == AR_UNAT_REGNUM)
5350 for (i = 0; i < 64; ++i)
5351 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5354 need_barrier = rws_access_reg (x, flags, pred);
5358 /* Find the regs used in memory address computation. */
5359 new_flags.is_write = 0;
5360 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5363 case CONST_INT: case CONST_DOUBLE:
5364 case SYMBOL_REF: case LABEL_REF: case CONST:
5367 /* Operators with side-effects. */
5368 case POST_INC: case POST_DEC:
5369 if (GET_CODE (XEXP (x, 0)) != REG)
5372 new_flags.is_write = 0;
5373 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5374 new_flags.is_write = 1;
5375 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5379 if (GET_CODE (XEXP (x, 0)) != REG)
5382 new_flags.is_write = 0;
5383 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5384 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5385 new_flags.is_write = 1;
5386 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5389 /* Handle common unary and binary ops for efficiency. */
5390 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5391 case MOD: case UDIV: case UMOD: case AND: case IOR:
5392 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5393 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5394 case NE: case EQ: case GE: case GT: case LE:
5395 case LT: case GEU: case GTU: case LEU: case LTU:
5396 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5397 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5400 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5401 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5402 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5403 case SQRT: case FFS: case POPCOUNT:
5404 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5408 switch (XINT (x, 1))
5410 case UNSPEC_LTOFF_DTPMOD:
5411 case UNSPEC_LTOFF_DTPREL:
5413 case UNSPEC_LTOFF_TPREL:
5415 case UNSPEC_PRED_REL_MUTEX:
5416 case UNSPEC_PIC_CALL:
5418 case UNSPEC_FETCHADD_ACQ:
5419 case UNSPEC_BSP_VALUE:
5420 case UNSPEC_FLUSHRS:
5421 case UNSPEC_BUNDLE_SELECTOR:
5424 case UNSPEC_GR_SPILL:
5425 case UNSPEC_GR_RESTORE:
5427 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5428 HOST_WIDE_INT bit = (offset >> 3) & 63;
5430 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5431 new_flags.is_write = (XINT (x, 1) == 1);
5432 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5437 case UNSPEC_FR_SPILL:
5438 case UNSPEC_FR_RESTORE:
5439 case UNSPEC_GETF_EXP:
5440 case UNSPEC_SETF_EXP:
5442 case UNSPEC_FR_SQRT_RECIP_APPROX:
5443 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5446 case UNSPEC_FR_RECIP_APPROX:
5447 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5448 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5451 case UNSPEC_CMPXCHG_ACQ:
5452 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5453 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5461 case UNSPEC_VOLATILE:
5462 switch (XINT (x, 1))
5465 /* Alloc must always be the first instruction of a group.
5466 We force this by always returning true. */
5467 /* ??? We might get better scheduling if we explicitly check for
5468 input/local/output register dependencies, and modify the
5469 scheduler so that alloc is always reordered to the start of
5470 the current group. We could then eliminate all of the
5471 first_instruction code. */
5472 rws_access_regno (AR_PFS_REGNUM, flags, pred);
5474 new_flags.is_write = 1;
5475 rws_access_regno (REG_AR_CFM, new_flags, pred);
5478 case UNSPECV_SET_BSP:
5482 case UNSPECV_BLOCKAGE:
5483 case UNSPECV_INSN_GROUP_BARRIER:
5485 case UNSPECV_PSAC_ALL:
5486 case UNSPECV_PSAC_NORMAL:
5495 new_flags.is_write = 0;
5496 need_barrier = rws_access_regno (REG_RP, flags, pred);
5497 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5499 new_flags.is_write = 1;
5500 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5501 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5505 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5506 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5507 switch (format_ptr[i])
5509 case '0': /* unused field */
5510 case 'i': /* integer */
5511 case 'n': /* note */
5512 case 'w': /* wide integer */
5513 case 's': /* pointer to string */
5514 case 'S': /* optional pointer to string */
5518 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5523 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5524 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5533 return need_barrier;
5536 /* Clear out the state for group_barrier_needed_p at the start of a
5537 sequence of insns. */
5540 init_insn_group_barriers (void)
5542 memset (rws_sum, 0, sizeof (rws_sum));
5543 first_instruction = 1;
5546 /* Given the current state, recorded by previous calls to this function,
5547 determine whether a group barrier (a stop bit) is necessary before INSN.
5548 Return nonzero if so. */
5551 group_barrier_needed_p (rtx insn)
5554 int need_barrier = 0;
5555 struct reg_flags flags;
5557 memset (&flags, 0, sizeof (flags));
5558 switch (GET_CODE (insn))
5564 /* A barrier doesn't imply an instruction group boundary. */
5568 memset (rws_insn, 0, sizeof (rws_insn));
5572 flags.is_branch = 1;
5573 flags.is_sibcall = SIBLING_CALL_P (insn);
5574 memset (rws_insn, 0, sizeof (rws_insn));
5576 /* Don't bundle a call following another call. */
5577 if ((pat = prev_active_insn (insn))
5578 && GET_CODE (pat) == CALL_INSN)
5584 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5588 flags.is_branch = 1;
5590 /* Don't bundle a jump following a call. */
5591 if ((pat = prev_active_insn (insn))
5592 && GET_CODE (pat) == CALL_INSN)
5600 if (GET_CODE (PATTERN (insn)) == USE
5601 || GET_CODE (PATTERN (insn)) == CLOBBER)
5602 /* Don't care about USE and CLOBBER "insns"---those are used to
5603 indicate to the optimizer that it shouldn't get rid of
5604 certain operations. */
5607 pat = PATTERN (insn);
5609 /* Ug. Hack hacks hacked elsewhere. */
5610 switch (recog_memoized (insn))
5612 /* We play dependency tricks with the epilogue in order
5613 to get proper schedules. Undo this for dv analysis. */
5614 case CODE_FOR_epilogue_deallocate_stack:
5615 case CODE_FOR_prologue_allocate_stack:
5616 pat = XVECEXP (pat, 0, 0);
5619 /* The pattern we use for br.cloop confuses the code above.
5620 The second element of the vector is representative. */
5621 case CODE_FOR_doloop_end_internal:
5622 pat = XVECEXP (pat, 0, 1);
5625 /* Doesn't generate code. */
5626 case CODE_FOR_pred_rel_mutex:
5627 case CODE_FOR_prologue_use:
5634 memset (rws_insn, 0, sizeof (rws_insn));
5635 need_barrier = rtx_needs_barrier (pat, flags, 0);
5637 /* Check to see if the previous instruction was a volatile
5640 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5647 if (first_instruction && INSN_P (insn)
5648 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5649 && GET_CODE (PATTERN (insn)) != USE
5650 && GET_CODE (PATTERN (insn)) != CLOBBER)
5653 first_instruction = 0;
5656 return need_barrier;
5659 /* Like group_barrier_needed_p, but do not clobber the current state. */
5662 safe_group_barrier_needed_p (rtx insn)
5664 struct reg_write_state rws_saved[NUM_REGS];
5665 int saved_first_instruction;
5668 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5669 saved_first_instruction = first_instruction;
5671 t = group_barrier_needed_p (insn);
5673 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5674 first_instruction = saved_first_instruction;
5679 /* Scan the current function and insert stop bits as necessary to
5680 eliminate dependencies. This function assumes that a final
5681 instruction scheduling pass has been run which has already
5682 inserted most of the necessary stop bits. This function only
5683 inserts new ones at basic block boundaries, since these are
5684 invisible to the scheduler. */
5687 emit_insn_group_barriers (FILE *dump)
5691 int insns_since_last_label = 0;
5693 init_insn_group_barriers ();
5695 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5697 if (GET_CODE (insn) == CODE_LABEL)
5699 if (insns_since_last_label)
5701 insns_since_last_label = 0;
5703 else if (GET_CODE (insn) == NOTE
5704 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5706 if (insns_since_last_label)
5708 insns_since_last_label = 0;
5710 else if (GET_CODE (insn) == INSN
5711 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5712 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5714 init_insn_group_barriers ();
5717 else if (INSN_P (insn))
5719 insns_since_last_label = 1;
5721 if (group_barrier_needed_p (insn))
5726 fprintf (dump, "Emitting stop before label %d\n",
5727 INSN_UID (last_label));
5728 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5731 init_insn_group_barriers ();
5739 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5740 This function has to emit all necessary group barriers. */
5743 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5747 init_insn_group_barriers ();
5749 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5751 if (GET_CODE (insn) == BARRIER)
5753 rtx last = prev_active_insn (insn);
5757 if (GET_CODE (last) == JUMP_INSN
5758 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5759 last = prev_active_insn (last);
5760 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5761 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5763 init_insn_group_barriers ();
5765 else if (INSN_P (insn))
5767 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5768 init_insn_group_barriers ();
5769 else if (group_barrier_needed_p (insn))
5771 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5772 init_insn_group_barriers ();
5773 group_barrier_needed_p (insn);
5780 static int errata_find_address_regs (rtx *, void *);
5781 static void errata_emit_nops (rtx);
5782 static void fixup_errata (void);
5784 /* This structure is used to track some details about the previous insns
5785 groups so we can determine if it may be necessary to insert NOPs to
5786 workaround hardware errata. */
5789 HARD_REG_SET p_reg_set;
5790 HARD_REG_SET gr_reg_conditionally_set;
5793 /* Index into the last_group array. */
5794 static int group_idx;
5796 /* Called through for_each_rtx; determines if a hard register that was
5797 conditionally set in the previous group is used as an address register.
5798 It ensures that for_each_rtx returns 1 in that case. */
5800 errata_find_address_regs (rtx *xp, void *data ATTRIBUTE_UNUSED)
5803 if (GET_CODE (x) != MEM)
5806 if (GET_CODE (x) == POST_MODIFY)
5808 if (GET_CODE (x) == REG)
5810 struct group *prev_group = last_group + (group_idx ^ 1);
5811 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5819 /* Called for each insn; this function keeps track of the state in
5820 last_group and emits additional NOPs if necessary to work around
5821 an Itanium A/B step erratum. */
5823 errata_emit_nops (rtx insn)
5825 struct group *this_group = last_group + group_idx;
5826 struct group *prev_group = last_group + (group_idx ^ 1);
5827 rtx pat = PATTERN (insn);
5828 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5829 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5830 enum attr_type type;
5833 if (GET_CODE (real_pat) == USE
5834 || GET_CODE (real_pat) == CLOBBER
5835 || GET_CODE (real_pat) == ASM_INPUT
5836 || GET_CODE (real_pat) == ADDR_VEC
5837 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5838 || asm_noperands (PATTERN (insn)) >= 0)
5841 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5844 if (GET_CODE (set) == PARALLEL)
5847 set = XVECEXP (real_pat, 0, 0);
5848 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5849 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5850 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5857 if (set && GET_CODE (set) != SET)
5860 type = get_attr_type (insn);
5863 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5864 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5866 if ((type == TYPE_M || type == TYPE_A) && cond && set
5867 && REG_P (SET_DEST (set))
5868 && GET_CODE (SET_SRC (set)) != PLUS
5869 && GET_CODE (SET_SRC (set)) != MINUS
5870 && (GET_CODE (SET_SRC (set)) != ASHIFT
5871 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5872 && (GET_CODE (SET_SRC (set)) != MEM
5873 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5874 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5876 if (!COMPARISON_P (cond)
5877 || !REG_P (XEXP (cond, 0)))
5880 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5881 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5883 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5885 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5886 emit_insn_before (gen_nop (), insn);
5887 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5889 memset (last_group, 0, sizeof last_group);
5893 /* Emit extra nops if they are required to work around hardware errata. */
5900 if (! TARGET_B_STEP)
5904 memset (last_group, 0, sizeof last_group);
5906 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5911 if (ia64_safe_type (insn) == TYPE_S)
5914 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5917 errata_emit_nops (insn);
5922 /* Instruction scheduling support. */
5924 #define NR_BUNDLES 10
5926 /* A list of names of all available bundles. */
5928 static const char *bundle_name [NR_BUNDLES] =
5934 #if NR_BUNDLES == 10
5944 /* Nonzero if we should insert stop bits into the schedule. */
5946 int ia64_final_schedule = 0;
5948 /* Codes of the corresponding quieryied units: */
5950 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5951 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5953 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5954 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5956 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5958 /* The following variable value is an insn group barrier. */
5960 static rtx dfa_stop_insn;
5962 /* The following variable value is the last issued insn. */
5964 static rtx last_scheduled_insn;
5966 /* The following variable value is size of the DFA state. */
5968 static size_t dfa_state_size;
5970 /* The following variable value is pointer to a DFA state used as
5971 temporary variable. */
5973 static state_t temp_dfa_state = NULL;
5975 /* The following variable value is DFA state after issuing the last
5978 static state_t prev_cycle_state = NULL;
5980 /* The following array element values are TRUE if the corresponding
5981 insn requires to add stop bits before it. */
5983 static char *stops_p;
5985 /* The following variable is used to set up the mentioned above array. */
5987 static int stop_before_p = 0;
5989 /* The following variable value is length of the arrays `clocks' and
5992 static int clocks_length;
5994 /* The following array element values are cycles on which the
5995 corresponding insn will be issued. The array is used only for
6000 /* The following array element values are numbers of cycles should be
6001 added to improve insn scheduling for MM_insns for Itanium1. */
6003 static int *add_cycles;
6005 static rtx ia64_single_set (rtx);
6006 static void ia64_emit_insn_before (rtx, rtx);
6008 /* Map a bundle number to its pseudo-op. */
6011 get_bundle_name (int b)
6013 return bundle_name[b];
6017 /* Return the maximum number of instructions a cpu can issue. */
6020 ia64_issue_rate (void)
6025 /* Helper function - like single_set, but look inside COND_EXEC. */
6028 ia64_single_set (rtx insn)
6030 rtx x = PATTERN (insn), ret;
6031 if (GET_CODE (x) == COND_EXEC)
6032 x = COND_EXEC_CODE (x);
6033 if (GET_CODE (x) == SET)
6036 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6037 Although they are not classical single set, the second set is there just
6038 to protect it from moving past FP-relative stack accesses. */
6039 switch (recog_memoized (insn))
6041 case CODE_FOR_prologue_allocate_stack:
6042 case CODE_FOR_epilogue_deallocate_stack:
6043 ret = XVECEXP (x, 0, 0);
6047 ret = single_set_2 (insn, x);
6054 /* Adjust the cost of a scheduling dependency. Return the new cost of
6055 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
6058 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
6060 enum attr_itanium_class dep_class;
6061 enum attr_itanium_class insn_class;
6063 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
6066 insn_class = ia64_safe_itanium_class (insn);
6067 dep_class = ia64_safe_itanium_class (dep_insn);
6068 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6069 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6075 /* Like emit_insn_before, but skip cycle_display notes.
6076 ??? When cycle display notes are implemented, update this. */
6079 ia64_emit_insn_before (rtx insn, rtx before)
6081 emit_insn_before (insn, before);
6084 /* The following function marks insns who produce addresses for load
6085 and store insns. Such insns will be placed into M slots because it
6086 decrease latency time for Itanium1 (see function
6087 `ia64_produce_address_p' and the DFA descriptions). */
6090 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6092 rtx insn, link, next, next_tail;
6094 next_tail = NEXT_INSN (tail);
6095 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6098 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6100 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6102 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6104 next = XEXP (link, 0);
6105 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
6106 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
6107 && ia64_st_address_bypass_p (insn, next))
6109 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
6110 || ia64_safe_itanium_class (next)
6111 == ITANIUM_CLASS_FLD)
6112 && ia64_ld_address_bypass_p (insn, next))
6115 insn->call = link != 0;
6119 /* We're beginning a new block. Initialize data structures as necessary. */
6122 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6123 int sched_verbose ATTRIBUTE_UNUSED,
6124 int max_ready ATTRIBUTE_UNUSED)
6126 #ifdef ENABLE_CHECKING
6129 if (reload_completed)
6130 for (insn = NEXT_INSN (current_sched_info->prev_head);
6131 insn != current_sched_info->next_tail;
6132 insn = NEXT_INSN (insn))
6133 if (SCHED_GROUP_P (insn))
6136 last_scheduled_insn = NULL_RTX;
6137 init_insn_group_barriers ();
6140 /* We are about to being issuing insns for this clock cycle.
6141 Override the default sort algorithm to better slot instructions. */
6144 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6145 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6149 int n_ready = *pn_ready;
6150 rtx *e_ready = ready + n_ready;
6154 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6156 if (reorder_type == 0)
6158 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6160 for (insnp = ready; insnp < e_ready; insnp++)
6161 if (insnp < e_ready)
6164 enum attr_type t = ia64_safe_type (insn);
6165 if (t == TYPE_UNKNOWN)
6167 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6168 || asm_noperands (PATTERN (insn)) >= 0)
6170 rtx lowest = ready[n_asms];
6171 ready[n_asms] = insn;
6177 rtx highest = ready[n_ready - 1];
6178 ready[n_ready - 1] = insn;
6185 if (n_asms < n_ready)
6187 /* Some normal insns to process. Skip the asms. */
6191 else if (n_ready > 0)
6195 if (ia64_final_schedule)
6198 int nr_need_stop = 0;
6200 for (insnp = ready; insnp < e_ready; insnp++)
6201 if (safe_group_barrier_needed_p (*insnp))
6204 if (reorder_type == 1 && n_ready == nr_need_stop)
6206 if (reorder_type == 0)
6209 /* Move down everything that needs a stop bit, preserving
6211 while (insnp-- > ready + deleted)
6212 while (insnp >= ready + deleted)
6215 if (! safe_group_barrier_needed_p (insn))
6217 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6228 /* We are about to being issuing insns for this clock cycle. Override
6229 the default sort algorithm to better slot instructions. */
6232 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6235 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6236 pn_ready, clock_var, 0);
6239 /* Like ia64_sched_reorder, but called after issuing each insn.
6240 Override the default sort algorithm to better slot instructions. */
6243 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6244 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6245 int *pn_ready, int clock_var)
6247 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6248 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6249 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6253 /* We are about to issue INSN. Return the number of insns left on the
6254 ready queue that can be issued this cycle. */
6257 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6258 int sched_verbose ATTRIBUTE_UNUSED,
6259 rtx insn ATTRIBUTE_UNUSED,
6260 int can_issue_more ATTRIBUTE_UNUSED)
6262 last_scheduled_insn = insn;
6263 memcpy (prev_cycle_state, curr_state, dfa_state_size);
6264 if (reload_completed)
6266 if (group_barrier_needed_p (insn))
6268 if (GET_CODE (insn) == CALL_INSN)
6269 init_insn_group_barriers ();
6270 stops_p [INSN_UID (insn)] = stop_before_p;
6276 /* We are choosing insn from the ready queue. Return nonzero if INSN
6280 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6282 if (insn == NULL_RTX || !INSN_P (insn))
6284 return (!reload_completed
6285 || !safe_group_barrier_needed_p (insn));
6288 /* The following variable value is pseudo-insn used by the DFA insn
6289 scheduler to change the DFA state when the simulated clock is
6292 static rtx dfa_pre_cycle_insn;
6294 /* We are about to being issuing INSN. Return nonzero if we can not
6295 issue it on given cycle CLOCK and return zero if we should not sort
6296 the ready queue on the next clock start. */
6299 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6300 int clock, int *sort_p)
6302 int setup_clocks_p = FALSE;
6304 if (insn == NULL_RTX || !INSN_P (insn))
6306 if ((reload_completed && safe_group_barrier_needed_p (insn))
6307 || (last_scheduled_insn
6308 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6309 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6310 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6312 init_insn_group_barriers ();
6313 if (verbose && dump)
6314 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6315 last_clock == clock ? " + cycle advance" : "");
6317 if (last_clock == clock)
6319 state_transition (curr_state, dfa_stop_insn);
6320 if (TARGET_EARLY_STOP_BITS)
6321 *sort_p = (last_scheduled_insn == NULL_RTX
6322 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6327 else if (reload_completed)
6328 setup_clocks_p = TRUE;
6329 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6330 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6331 state_reset (curr_state);
6334 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6335 state_transition (curr_state, dfa_stop_insn);
6336 state_transition (curr_state, dfa_pre_cycle_insn);
6337 state_transition (curr_state, NULL);
6340 else if (reload_completed)
6341 setup_clocks_p = TRUE;
6342 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6343 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6344 && asm_noperands (PATTERN (insn)) < 0)
6346 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6348 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6353 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6354 if (REG_NOTE_KIND (link) == 0)
6356 enum attr_itanium_class dep_class;
6357 rtx dep_insn = XEXP (link, 0);
6359 dep_class = ia64_safe_itanium_class (dep_insn);
6360 if ((dep_class == ITANIUM_CLASS_MMMUL
6361 || dep_class == ITANIUM_CLASS_MMSHF)
6362 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6364 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6365 d = last_clock - clocks [INSN_UID (dep_insn)];
6368 add_cycles [INSN_UID (insn)] = 3 - d;
6376 /* The following page contains abstract data `bundle states' which are
6377 used for bundling insns (inserting nops and template generation). */
6379 /* The following describes state of insn bundling. */
6383 /* Unique bundle state number to identify them in the debugging
6386 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
6387 /* number nops before and after the insn */
6388 short before_nops_num, after_nops_num;
6389 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6391 int cost; /* cost of the state in cycles */
6392 int accumulated_insns_num; /* number of all previous insns including
6393 nops. L is considered as 2 insns */
6394 int branch_deviation; /* deviation of previous branches from 3rd slots */
6395 struct bundle_state *next; /* next state with the same insn_num */
6396 struct bundle_state *originator; /* originator (previous insn state) */
6397 /* All bundle states are in the following chain. */
6398 struct bundle_state *allocated_states_chain;
6399 /* The DFA State after issuing the insn and the nops. */
6403 /* The following is map insn number to the corresponding bundle state. */
6405 static struct bundle_state **index_to_bundle_states;
6407 /* The unique number of next bundle state. */
6409 static int bundle_states_num;
6411 /* All allocated bundle states are in the following chain. */
6413 static struct bundle_state *allocated_bundle_states_chain;
6415 /* All allocated but not used bundle states are in the following
6418 static struct bundle_state *free_bundle_state_chain;
6421 /* The following function returns a free bundle state. */
6423 static struct bundle_state *
6424 get_free_bundle_state (void)
6426 struct bundle_state *result;
6428 if (free_bundle_state_chain != NULL)
6430 result = free_bundle_state_chain;
6431 free_bundle_state_chain = result->next;
6435 result = xmalloc (sizeof (struct bundle_state));
6436 result->dfa_state = xmalloc (dfa_state_size);
6437 result->allocated_states_chain = allocated_bundle_states_chain;
6438 allocated_bundle_states_chain = result;
6440 result->unique_num = bundle_states_num++;
6445 /* The following function frees given bundle state. */
6448 free_bundle_state (struct bundle_state *state)
6450 state->next = free_bundle_state_chain;
6451 free_bundle_state_chain = state;
6454 /* Start work with abstract data `bundle states'. */
6457 initiate_bundle_states (void)
6459 bundle_states_num = 0;
6460 free_bundle_state_chain = NULL;
6461 allocated_bundle_states_chain = NULL;
6464 /* Finish work with abstract data `bundle states'. */
6467 finish_bundle_states (void)
6469 struct bundle_state *curr_state, *next_state;
6471 for (curr_state = allocated_bundle_states_chain;
6473 curr_state = next_state)
6475 next_state = curr_state->allocated_states_chain;
6476 free (curr_state->dfa_state);
6481 /* Hash table of the bundle states. The key is dfa_state and insn_num
6482 of the bundle states. */
6484 static htab_t bundle_state_table;
6486 /* The function returns hash of BUNDLE_STATE. */
6489 bundle_state_hash (const void *bundle_state)
6491 const struct bundle_state *state = (struct bundle_state *) bundle_state;
6494 for (result = i = 0; i < dfa_state_size; i++)
6495 result += (((unsigned char *) state->dfa_state) [i]
6496 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6497 return result + state->insn_num;
6500 /* The function returns nonzero if the bundle state keys are equal. */
6503 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
6505 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6506 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6508 return (state1->insn_num == state2->insn_num
6509 && memcmp (state1->dfa_state, state2->dfa_state,
6510 dfa_state_size) == 0);
6513 /* The function inserts the BUNDLE_STATE into the hash table. The
6514 function returns nonzero if the bundle has been inserted into the
6515 table. The table contains the best bundle state with given key. */
6518 insert_bundle_state (struct bundle_state *bundle_state)
6522 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6523 if (*entry_ptr == NULL)
6525 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6526 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6527 *entry_ptr = (void *) bundle_state;
6530 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6531 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6532 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6533 > bundle_state->accumulated_insns_num
6534 || (((struct bundle_state *)
6535 *entry_ptr)->accumulated_insns_num
6536 == bundle_state->accumulated_insns_num
6537 && ((struct bundle_state *)
6538 *entry_ptr)->branch_deviation
6539 > bundle_state->branch_deviation))))
6542 struct bundle_state temp;
6544 temp = *(struct bundle_state *) *entry_ptr;
6545 *(struct bundle_state *) *entry_ptr = *bundle_state;
6546 ((struct bundle_state *) *entry_ptr)->next = temp.next;
6547 *bundle_state = temp;
6552 /* Start work with the hash table. */
6555 initiate_bundle_state_table (void)
6557 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6561 /* Finish work with the hash table. */
6564 finish_bundle_state_table (void)
6566 htab_delete (bundle_state_table);
6571 /* The following variable is a insn `nop' used to check bundle states
6572 with different number of inserted nops. */
6574 static rtx ia64_nop;
6576 /* The following function tries to issue NOPS_NUM nops for the current
6577 state without advancing processor cycle. If it failed, the
6578 function returns FALSE and frees the current state. */
6581 try_issue_nops (struct bundle_state *curr_state, int nops_num)
6585 for (i = 0; i < nops_num; i++)
6586 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6588 free_bundle_state (curr_state);
6594 /* The following function tries to issue INSN for the current
6595 state without advancing processor cycle. If it failed, the
6596 function returns FALSE and frees the current state. */
6599 try_issue_insn (struct bundle_state *curr_state, rtx insn)
6601 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6603 free_bundle_state (curr_state);
6609 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6610 starting with ORIGINATOR without advancing processor cycle. If
6611 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6612 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6613 If it was successful, the function creates new bundle state and
6614 insert into the hash table and into `index_to_bundle_states'. */
6617 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6618 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6620 struct bundle_state *curr_state;
6622 curr_state = get_free_bundle_state ();
6623 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6624 curr_state->insn = insn;
6625 curr_state->insn_num = originator->insn_num + 1;
6626 curr_state->cost = originator->cost;
6627 curr_state->originator = originator;
6628 curr_state->before_nops_num = before_nops_num;
6629 curr_state->after_nops_num = 0;
6630 curr_state->accumulated_insns_num
6631 = originator->accumulated_insns_num + before_nops_num;
6632 curr_state->branch_deviation = originator->branch_deviation;
6633 if (insn == NULL_RTX)
6635 else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6637 if (GET_MODE (insn) == TImode)
6639 if (!try_issue_nops (curr_state, before_nops_num))
6641 if (!try_issue_insn (curr_state, insn))
6643 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6644 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6645 && curr_state->accumulated_insns_num % 3 != 0)
6647 free_bundle_state (curr_state);
6651 else if (GET_MODE (insn) != TImode)
6653 if (!try_issue_nops (curr_state, before_nops_num))
6655 if (!try_issue_insn (curr_state, insn))
6657 curr_state->accumulated_insns_num++;
6658 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6659 || asm_noperands (PATTERN (insn)) >= 0)
6661 if (ia64_safe_type (insn) == TYPE_L)
6662 curr_state->accumulated_insns_num++;
6666 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6667 state_transition (curr_state->dfa_state, NULL);
6669 if (!try_issue_nops (curr_state, before_nops_num))
6671 if (!try_issue_insn (curr_state, insn))
6673 curr_state->accumulated_insns_num++;
6674 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6675 || asm_noperands (PATTERN (insn)) >= 0)
6677 /* Finish bundle containing asm insn. */
6678 curr_state->after_nops_num
6679 = 3 - curr_state->accumulated_insns_num % 3;
6680 curr_state->accumulated_insns_num
6681 += 3 - curr_state->accumulated_insns_num % 3;
6683 else if (ia64_safe_type (insn) == TYPE_L)
6684 curr_state->accumulated_insns_num++;
6686 if (ia64_safe_type (insn) == TYPE_B)
6687 curr_state->branch_deviation
6688 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6689 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6691 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6694 struct bundle_state *curr_state1;
6695 struct bundle_state *allocated_states_chain;
6697 curr_state1 = get_free_bundle_state ();
6698 dfa_state = curr_state1->dfa_state;
6699 allocated_states_chain = curr_state1->allocated_states_chain;
6700 *curr_state1 = *curr_state;
6701 curr_state1->dfa_state = dfa_state;
6702 curr_state1->allocated_states_chain = allocated_states_chain;
6703 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6705 curr_state = curr_state1;
6707 if (!try_issue_nops (curr_state,
6708 3 - curr_state->accumulated_insns_num % 3))
6710 curr_state->after_nops_num
6711 = 3 - curr_state->accumulated_insns_num % 3;
6712 curr_state->accumulated_insns_num
6713 += 3 - curr_state->accumulated_insns_num % 3;
6715 if (!insert_bundle_state (curr_state))
6716 free_bundle_state (curr_state);
6720 /* The following function returns position in the two window bundle
6724 get_max_pos (state_t state)
6726 if (cpu_unit_reservation_p (state, pos_6))
6728 else if (cpu_unit_reservation_p (state, pos_5))
6730 else if (cpu_unit_reservation_p (state, pos_4))
6732 else if (cpu_unit_reservation_p (state, pos_3))
6734 else if (cpu_unit_reservation_p (state, pos_2))
6736 else if (cpu_unit_reservation_p (state, pos_1))
6742 /* The function returns code of a possible template for given position
6743 and state. The function should be called only with 2 values of
6744 position equal to 3 or 6. */
6747 get_template (state_t state, int pos)
6752 if (cpu_unit_reservation_p (state, _0mii_))
6754 else if (cpu_unit_reservation_p (state, _0mmi_))
6756 else if (cpu_unit_reservation_p (state, _0mfi_))
6758 else if (cpu_unit_reservation_p (state, _0mmf_))
6760 else if (cpu_unit_reservation_p (state, _0bbb_))
6762 else if (cpu_unit_reservation_p (state, _0mbb_))
6764 else if (cpu_unit_reservation_p (state, _0mib_))
6766 else if (cpu_unit_reservation_p (state, _0mmb_))
6768 else if (cpu_unit_reservation_p (state, _0mfb_))
6770 else if (cpu_unit_reservation_p (state, _0mlx_))
6775 if (cpu_unit_reservation_p (state, _1mii_))
6777 else if (cpu_unit_reservation_p (state, _1mmi_))
6779 else if (cpu_unit_reservation_p (state, _1mfi_))
6781 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6783 else if (cpu_unit_reservation_p (state, _1bbb_))
6785 else if (cpu_unit_reservation_p (state, _1mbb_))
6787 else if (cpu_unit_reservation_p (state, _1mib_))
6789 else if (cpu_unit_reservation_p (state, _1mmb_))
6791 else if (cpu_unit_reservation_p (state, _1mfb_))
6793 else if (cpu_unit_reservation_p (state, _1mlx_))
6802 /* The following function returns an insn important for insn bundling
6803 followed by INSN and before TAIL. */
6806 get_next_important_insn (rtx insn, rtx tail)
6808 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6810 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6811 && GET_CODE (PATTERN (insn)) != USE
6812 && GET_CODE (PATTERN (insn)) != CLOBBER)
6817 /* The following function does insn bundling. Bundling means
6818 inserting templates and nop insns to fit insn groups into permitted
6819 templates. Instruction scheduling uses NDFA (non-deterministic
6820 finite automata) encoding informations about the templates and the
6821 inserted nops. Nondeterminism of the automata permits follows
6822 all possible insn sequences very fast.
6824 Unfortunately it is not possible to get information about inserting
6825 nop insns and used templates from the automata states. The
6826 automata only says that we can issue an insn possibly inserting
6827 some nops before it and using some template. Therefore insn
6828 bundling in this function is implemented by using DFA
6829 (deterministic finite automata). We follows all possible insn
6830 sequences by inserting 0-2 nops (that is what the NDFA describe for
6831 insn scheduling) before/after each insn being bundled. We know the
6832 start of simulated processor cycle from insn scheduling (insn
6833 starting a new cycle has TImode).
6835 Simple implementation of insn bundling would create enormous
6836 number of possible insn sequences satisfying information about new
6837 cycle ticks taken from the insn scheduling. To make the algorithm
6838 practical we use dynamic programming. Each decision (about
6839 inserting nops and implicitly about previous decisions) is described
6840 by structure bundle_state (see above). If we generate the same
6841 bundle state (key is automaton state after issuing the insns and
6842 nops for it), we reuse already generated one. As consequence we
6843 reject some decisions which can not improve the solution and
6844 reduce memory for the algorithm.
6846 When we reach the end of EBB (extended basic block), we choose the
6847 best sequence and then, moving back in EBB, insert templates for
6848 the best alternative. The templates are taken from querying
6849 automaton state for each insn in chosen bundle states.
6851 So the algorithm makes two (forward and backward) passes through
6852 EBB. There is an additional forward pass through EBB for Itanium1
6853 processor. This pass inserts more nops to make dependency between
6854 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6857 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6859 struct bundle_state *curr_state, *next_state, *best_state;
6860 rtx insn, next_insn;
6862 int i, bundle_end_p, only_bundle_end_p, asm_p;
6863 int pos = 0, max_pos, template0, template1;
6866 enum attr_type type;
6869 /* Count insns in the EBB. */
6870 for (insn = NEXT_INSN (prev_head_insn);
6871 insn && insn != tail;
6872 insn = NEXT_INSN (insn))
6878 dfa_clean_insn_cache ();
6879 initiate_bundle_state_table ();
6880 index_to_bundle_states = xmalloc ((insn_num + 2)
6881 * sizeof (struct bundle_state *));
6882 /* First (forward) pass -- generation of bundle states. */
6883 curr_state = get_free_bundle_state ();
6884 curr_state->insn = NULL;
6885 curr_state->before_nops_num = 0;
6886 curr_state->after_nops_num = 0;
6887 curr_state->insn_num = 0;
6888 curr_state->cost = 0;
6889 curr_state->accumulated_insns_num = 0;
6890 curr_state->branch_deviation = 0;
6891 curr_state->next = NULL;
6892 curr_state->originator = NULL;
6893 state_reset (curr_state->dfa_state);
6894 index_to_bundle_states [0] = curr_state;
6896 /* Shift cycle mark if it is put on insn which could be ignored. */
6897 for (insn = NEXT_INSN (prev_head_insn);
6899 insn = NEXT_INSN (insn))
6901 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6902 || GET_CODE (PATTERN (insn)) == USE
6903 || GET_CODE (PATTERN (insn)) == CLOBBER)
6904 && GET_MODE (insn) == TImode)
6906 PUT_MODE (insn, VOIDmode);
6907 for (next_insn = NEXT_INSN (insn);
6909 next_insn = NEXT_INSN (next_insn))
6910 if (INSN_P (next_insn)
6911 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6912 && GET_CODE (PATTERN (next_insn)) != USE
6913 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6915 PUT_MODE (next_insn, TImode);
6919 /* Froward pass: generation of bundle states. */
6920 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6925 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6926 || GET_CODE (PATTERN (insn)) == USE
6927 || GET_CODE (PATTERN (insn)) == CLOBBER)
6929 type = ia64_safe_type (insn);
6930 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6932 index_to_bundle_states [insn_num] = NULL;
6933 for (curr_state = index_to_bundle_states [insn_num - 1];
6935 curr_state = next_state)
6937 pos = curr_state->accumulated_insns_num % 3;
6938 next_state = curr_state->next;
6939 /* We must fill up the current bundle in order to start a
6940 subsequent asm insn in a new bundle. Asm insn is always
6941 placed in a separate bundle. */
6943 = (next_insn != NULL_RTX
6944 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6945 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6946 /* We may fill up the current bundle if it is the cycle end
6947 without a group barrier. */
6949 = (only_bundle_end_p || next_insn == NULL_RTX
6950 || (GET_MODE (next_insn) == TImode
6951 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6952 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6954 /* We need to insert 2 nops for cases like M_MII. To
6955 guarantee issuing all insns on the same cycle for
6956 Itanium 1, we need to issue 2 nops after the first M
6957 insn (MnnMII where n is a nop insn). */
6958 || ((type == TYPE_M || type == TYPE_A)
6959 && ia64_tune == PROCESSOR_ITANIUM
6960 && !bundle_end_p && pos == 1))
6961 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6963 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6965 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6968 if (index_to_bundle_states [insn_num] == NULL)
6970 for (curr_state = index_to_bundle_states [insn_num];
6972 curr_state = curr_state->next)
6973 if (verbose >= 2 && dump)
6975 /* This structure is taken from generated code of the
6976 pipeline hazard recognizer (see file insn-attrtab.c).
6977 Please don't forget to change the structure if a new
6978 automaton is added to .md file. */
6981 unsigned short one_automaton_state;
6982 unsigned short oneb_automaton_state;
6983 unsigned short two_automaton_state;
6984 unsigned short twob_automaton_state;
6989 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6990 curr_state->unique_num,
6991 (curr_state->originator == NULL
6992 ? -1 : curr_state->originator->unique_num),
6994 curr_state->before_nops_num, curr_state->after_nops_num,
6995 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6996 (ia64_tune == PROCESSOR_ITANIUM
6997 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6998 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7002 if (index_to_bundle_states [insn_num] == NULL)
7003 /* We should find a solution because the 2nd insn scheduling has
7006 /* Find a state corresponding to the best insn sequence. */
7008 for (curr_state = index_to_bundle_states [insn_num];
7010 curr_state = curr_state->next)
7011 /* We are just looking at the states with fully filled up last
7012 bundle. The first we prefer insn sequences with minimal cost
7013 then with minimal inserted nops and finally with branch insns
7014 placed in the 3rd slots. */
7015 if (curr_state->accumulated_insns_num % 3 == 0
7016 && (best_state == NULL || best_state->cost > curr_state->cost
7017 || (best_state->cost == curr_state->cost
7018 && (curr_state->accumulated_insns_num
7019 < best_state->accumulated_insns_num
7020 || (curr_state->accumulated_insns_num
7021 == best_state->accumulated_insns_num
7022 && curr_state->branch_deviation
7023 < best_state->branch_deviation)))))
7024 best_state = curr_state;
7025 /* Second (backward) pass: adding nops and templates. */
7026 insn_num = best_state->before_nops_num;
7027 template0 = template1 = -1;
7028 for (curr_state = best_state;
7029 curr_state->originator != NULL;
7030 curr_state = curr_state->originator)
7032 insn = curr_state->insn;
7033 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
7034 || asm_noperands (PATTERN (insn)) >= 0);
7036 if (verbose >= 2 && dump)
7040 unsigned short one_automaton_state;
7041 unsigned short oneb_automaton_state;
7042 unsigned short two_automaton_state;
7043 unsigned short twob_automaton_state;
7048 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
7049 curr_state->unique_num,
7050 (curr_state->originator == NULL
7051 ? -1 : curr_state->originator->unique_num),
7053 curr_state->before_nops_num, curr_state->after_nops_num,
7054 curr_state->accumulated_insns_num, curr_state->branch_deviation,
7055 (ia64_tune == PROCESSOR_ITANIUM
7056 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7057 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7060 /* Find the position in the current bundle window. The window can
7061 contain at most two bundles. Two bundle window means that
7062 the processor will make two bundle rotation. */
7063 max_pos = get_max_pos (curr_state->dfa_state);
7065 /* The following (negative template number) means that the
7066 processor did one bundle rotation. */
7067 || (max_pos == 3 && template0 < 0))
7069 /* We are at the end of the window -- find template(s) for
7073 template0 = get_template (curr_state->dfa_state, 3);
7076 template1 = get_template (curr_state->dfa_state, 3);
7077 template0 = get_template (curr_state->dfa_state, 6);
7080 if (max_pos > 3 && template1 < 0)
7081 /* It may happen when we have the stop inside a bundle. */
7085 template1 = get_template (curr_state->dfa_state, 3);
7089 /* Emit nops after the current insn. */
7090 for (i = 0; i < curr_state->after_nops_num; i++)
7093 emit_insn_after (nop, insn);
7099 /* We are at the start of a bundle: emit the template
7100 (it should be defined). */
7103 b = gen_bundle_selector (GEN_INT (template0));
7104 ia64_emit_insn_before (b, nop);
7105 /* If we have two bundle window, we make one bundle
7106 rotation. Otherwise template0 will be undefined
7107 (negative value). */
7108 template0 = template1;
7112 /* Move the position backward in the window. Group barrier has
7113 no slot. Asm insn takes all bundle. */
7114 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7115 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7116 && asm_noperands (PATTERN (insn)) < 0)
7118 /* Long insn takes 2 slots. */
7119 if (ia64_safe_type (insn) == TYPE_L)
7124 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7125 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7126 && asm_noperands (PATTERN (insn)) < 0)
7128 /* The current insn is at the bundle start: emit the
7132 b = gen_bundle_selector (GEN_INT (template0));
7133 ia64_emit_insn_before (b, insn);
7134 b = PREV_INSN (insn);
7136 /* See comment above in analogous place for emitting nops
7138 template0 = template1;
7141 /* Emit nops after the current insn. */
7142 for (i = 0; i < curr_state->before_nops_num; i++)
7145 ia64_emit_insn_before (nop, insn);
7146 nop = PREV_INSN (insn);
7153 /* See comment above in analogous place for emitting nops
7157 b = gen_bundle_selector (GEN_INT (template0));
7158 ia64_emit_insn_before (b, insn);
7159 b = PREV_INSN (insn);
7161 template0 = template1;
7166 if (ia64_tune == PROCESSOR_ITANIUM)
7167 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
7168 Itanium1 has a strange design, if the distance between an insn
7169 and dependent MM-insn is less 4 then we have a 6 additional
7170 cycles stall. So we make the distance equal to 4 cycles if it
7172 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7177 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
7178 || GET_CODE (PATTERN (insn)) == USE
7179 || GET_CODE (PATTERN (insn)) == CLOBBER)
7181 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7182 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
7183 /* We found a MM-insn which needs additional cycles. */
7189 /* Now we are searching for a template of the bundle in
7190 which the MM-insn is placed and the position of the
7191 insn in the bundle (0, 1, 2). Also we are searching
7192 for that there is a stop before the insn. */
7193 last = prev_active_insn (insn);
7194 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
7196 last = prev_active_insn (last);
7198 for (;; last = prev_active_insn (last))
7199 if (recog_memoized (last) == CODE_FOR_bundle_selector)
7201 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
7203 /* The insn is in MLX bundle. Change the template
7204 onto MFI because we will add nops before the
7205 insn. It simplifies subsequent code a lot. */
7207 = gen_bundle_selector (const2_rtx); /* -> MFI */
7210 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7212 /* Some check of correctness: the stop is not at the
7213 bundle start, there are no more 3 insns in the bundle,
7214 and the MM-insn is not at the start of bundle with
7216 if ((pred_stop_p && n == 0) || n > 2
7217 || (template0 == 9 && n != 0))
7219 /* Put nops after the insn in the bundle. */
7220 for (j = 3 - n; j > 0; j --)
7221 ia64_emit_insn_before (gen_nop (), insn);
7222 /* It takes into account that we will add more N nops
7223 before the insn lately -- please see code below. */
7224 add_cycles [INSN_UID (insn)]--;
7225 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
7226 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7229 add_cycles [INSN_UID (insn)]--;
7230 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
7232 /* Insert "MII;" template. */
7233 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
7235 ia64_emit_insn_before (gen_nop (), insn);
7236 ia64_emit_insn_before (gen_nop (), insn);
7239 /* To decrease code size, we use "MI;I;"
7241 ia64_emit_insn_before
7242 (gen_insn_group_barrier (GEN_INT (3)), insn);
7245 ia64_emit_insn_before (gen_nop (), insn);
7246 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7249 /* Put the MM-insn in the same slot of a bundle with the
7250 same template as the original one. */
7251 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
7253 /* To put the insn in the same slot, add necessary number
7255 for (j = n; j > 0; j --)
7256 ia64_emit_insn_before (gen_nop (), insn);
7257 /* Put the stop if the original bundle had it. */
7259 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7263 free (index_to_bundle_states);
7264 finish_bundle_state_table ();
7266 dfa_clean_insn_cache ();
7269 /* The following function is called at the end of scheduling BB or
7270 EBB. After reload, it inserts stop bits and does insn bundling. */
7273 ia64_sched_finish (FILE *dump, int sched_verbose)
7276 fprintf (dump, "// Finishing schedule.\n");
7277 if (!reload_completed)
7279 if (reload_completed)
7281 final_emit_insn_group_barriers (dump);
7282 bundling (dump, sched_verbose, current_sched_info->prev_head,
7283 current_sched_info->next_tail);
7284 if (sched_verbose && dump)
7285 fprintf (dump, "// finishing %d-%d\n",
7286 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
7287 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
7293 /* The following function inserts stop bits in scheduled BB or EBB. */
7296 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7299 int need_barrier_p = 0;
7300 rtx prev_insn = NULL_RTX;
7302 init_insn_group_barriers ();
7304 for (insn = NEXT_INSN (current_sched_info->prev_head);
7305 insn != current_sched_info->next_tail;
7306 insn = NEXT_INSN (insn))
7308 if (GET_CODE (insn) == BARRIER)
7310 rtx last = prev_active_insn (insn);
7314 if (GET_CODE (last) == JUMP_INSN
7315 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7316 last = prev_active_insn (last);
7317 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7318 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7320 init_insn_group_barriers ();
7322 prev_insn = NULL_RTX;
7324 else if (INSN_P (insn))
7326 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7328 init_insn_group_barriers ();
7330 prev_insn = NULL_RTX;
7332 else if (need_barrier_p || group_barrier_needed_p (insn))
7334 if (TARGET_EARLY_STOP_BITS)
7339 last != current_sched_info->prev_head;
7340 last = PREV_INSN (last))
7341 if (INSN_P (last) && GET_MODE (last) == TImode
7342 && stops_p [INSN_UID (last)])
7344 if (last == current_sched_info->prev_head)
7346 last = prev_active_insn (last);
7348 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
7349 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7351 init_insn_group_barriers ();
7352 for (last = NEXT_INSN (last);
7354 last = NEXT_INSN (last))
7356 group_barrier_needed_p (last);
7360 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7362 init_insn_group_barriers ();
7364 group_barrier_needed_p (insn);
7365 prev_insn = NULL_RTX;
7367 else if (recog_memoized (insn) >= 0)
7369 need_barrier_p = (GET_CODE (insn) == CALL_INSN
7370 || GET_CODE (PATTERN (insn)) == ASM_INPUT
7371 || asm_noperands (PATTERN (insn)) >= 0);
7378 /* If the following function returns TRUE, we will use the the DFA
7382 ia64_first_cycle_multipass_dfa_lookahead (void)
7384 return (reload_completed ? 6 : 4);
7387 /* The following function initiates variable `dfa_pre_cycle_insn'. */
7390 ia64_init_dfa_pre_cycle_insn (void)
7392 if (temp_dfa_state == NULL)
7394 dfa_state_size = state_size ();
7395 temp_dfa_state = xmalloc (dfa_state_size);
7396 prev_cycle_state = xmalloc (dfa_state_size);
7398 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7399 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7400 recog_memoized (dfa_pre_cycle_insn);
7401 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7402 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7403 recog_memoized (dfa_stop_insn);
7406 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7407 used by the DFA insn scheduler. */
7410 ia64_dfa_pre_cycle_insn (void)
7412 return dfa_pre_cycle_insn;
7415 /* The following function returns TRUE if PRODUCER (of type ilog or
7416 ld) produces address for CONSUMER (of type st or stf). */
7419 ia64_st_address_bypass_p (rtx producer, rtx consumer)
7423 if (producer == NULL_RTX || consumer == NULL_RTX)
7425 dest = ia64_single_set (producer);
7426 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7427 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7429 if (GET_CODE (reg) == SUBREG)
7430 reg = SUBREG_REG (reg);
7431 dest = ia64_single_set (consumer);
7432 if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
7433 || GET_CODE (mem) != MEM)
7435 return reg_mentioned_p (reg, mem);
7438 /* The following function returns TRUE if PRODUCER (of type ilog or
7439 ld) produces address for CONSUMER (of type ld or fld). */
7442 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
7444 rtx dest, src, reg, mem;
7446 if (producer == NULL_RTX || consumer == NULL_RTX)
7448 dest = ia64_single_set (producer);
7449 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7450 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7452 if (GET_CODE (reg) == SUBREG)
7453 reg = SUBREG_REG (reg);
7454 src = ia64_single_set (consumer);
7455 if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
7457 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7458 mem = XVECEXP (mem, 0, 0);
7459 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7460 mem = XEXP (mem, 0);
7462 /* Note that LO_SUM is used for GOT loads. */
7463 if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
7466 return reg_mentioned_p (reg, mem);
7469 /* The following function returns TRUE if INSN produces address for a
7470 load/store insn. We will place such insns into M slot because it
7471 decreases its latency time. */
7474 ia64_produce_address_p (rtx insn)
7480 /* Emit pseudo-ops for the assembler to describe predicate relations.
7481 At present this assumes that we only consider predicate pairs to
7482 be mutex, and that the assembler can deduce proper values from
7483 straight-line code. */
7486 emit_predicate_relation_info (void)
7490 FOR_EACH_BB_REVERSE (bb)
7493 rtx head = BB_HEAD (bb);
7495 /* We only need such notes at code labels. */
7496 if (GET_CODE (head) != CODE_LABEL)
7498 if (GET_CODE (NEXT_INSN (head)) == NOTE
7499 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7500 head = NEXT_INSN (head);
7502 for (r = PR_REG (0); r < PR_REG (64); r += 2)
7503 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7505 rtx p = gen_rtx_REG (BImode, r);
7506 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7507 if (head == BB_END (bb))
7513 /* Look for conditional calls that do not return, and protect predicate
7514 relations around them. Otherwise the assembler will assume the call
7515 returns, and complain about uses of call-clobbered predicates after
7517 FOR_EACH_BB_REVERSE (bb)
7519 rtx insn = BB_HEAD (bb);
7523 if (GET_CODE (insn) == CALL_INSN
7524 && GET_CODE (PATTERN (insn)) == COND_EXEC
7525 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7527 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7528 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7529 if (BB_HEAD (bb) == insn)
7531 if (BB_END (bb) == insn)
7535 if (insn == BB_END (bb))
7537 insn = NEXT_INSN (insn);
7542 /* Perform machine dependent operations on the rtl chain INSNS. */
7547 /* We are freeing block_for_insn in the toplev to keep compatibility
7548 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7549 compute_bb_for_insn ();
7551 /* If optimizing, we'll have split before scheduling. */
7553 split_all_insns (0);
7555 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7556 non-optimizing bootstrap. */
7557 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7559 if (ia64_flag_schedule_insns2)
7561 timevar_push (TV_SCHED2);
7562 ia64_final_schedule = 1;
7564 initiate_bundle_states ();
7565 ia64_nop = make_insn_raw (gen_nop ());
7566 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7567 recog_memoized (ia64_nop);
7568 clocks_length = get_max_uid () + 1;
7569 stops_p = xcalloc (1, clocks_length);
7570 if (ia64_tune == PROCESSOR_ITANIUM)
7572 clocks = xcalloc (clocks_length, sizeof (int));
7573 add_cycles = xcalloc (clocks_length, sizeof (int));
7575 if (ia64_tune == PROCESSOR_ITANIUM2)
7577 pos_1 = get_cpu_unit_code ("2_1");
7578 pos_2 = get_cpu_unit_code ("2_2");
7579 pos_3 = get_cpu_unit_code ("2_3");
7580 pos_4 = get_cpu_unit_code ("2_4");
7581 pos_5 = get_cpu_unit_code ("2_5");
7582 pos_6 = get_cpu_unit_code ("2_6");
7583 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7584 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7585 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7586 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7587 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7588 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7589 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7590 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7591 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7592 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7593 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7594 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7595 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7596 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7597 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7598 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7599 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7600 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7601 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7602 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7606 pos_1 = get_cpu_unit_code ("1_1");
7607 pos_2 = get_cpu_unit_code ("1_2");
7608 pos_3 = get_cpu_unit_code ("1_3");
7609 pos_4 = get_cpu_unit_code ("1_4");
7610 pos_5 = get_cpu_unit_code ("1_5");
7611 pos_6 = get_cpu_unit_code ("1_6");
7612 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7613 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7614 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7615 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7616 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7617 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7618 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7619 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7620 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7621 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7622 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7623 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7624 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7625 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7626 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7627 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7628 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7629 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7630 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7631 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7633 schedule_ebbs (dump_file);
7634 finish_bundle_states ();
7635 if (ia64_tune == PROCESSOR_ITANIUM)
7641 emit_insn_group_barriers (dump_file);
7643 ia64_final_schedule = 0;
7644 timevar_pop (TV_SCHED2);
7647 emit_all_insn_group_barriers (dump_file);
7649 /* A call must not be the last instruction in a function, so that the
7650 return address is still within the function, so that unwinding works
7651 properly. Note that IA-64 differs from dwarf2 on this point. */
7652 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7657 insn = get_last_insn ();
7658 if (! INSN_P (insn))
7659 insn = prev_active_insn (insn);
7660 /* Skip over insns that expand to nothing. */
7661 while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7663 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7664 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7666 insn = prev_active_insn (insn);
7668 if (GET_CODE (insn) == CALL_INSN)
7671 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7672 emit_insn (gen_break_f ());
7673 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7678 emit_predicate_relation_info ();
7680 if (ia64_flag_var_tracking)
7682 timevar_push (TV_VAR_TRACKING);
7683 variable_tracking_main ();
7684 timevar_pop (TV_VAR_TRACKING);
7688 /* Return true if REGNO is used by the epilogue. */
7691 ia64_epilogue_uses (int regno)
7696 /* With a call to a function in another module, we will write a new
7697 value to "gp". After returning from such a call, we need to make
7698 sure the function restores the original gp-value, even if the
7699 function itself does not use the gp anymore. */
7700 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7702 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7703 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7704 /* For functions defined with the syscall_linkage attribute, all
7705 input registers are marked as live at all function exits. This
7706 prevents the register allocator from using the input registers,
7707 which in turn makes it possible to restart a system call after
7708 an interrupt without having to save/restore the input registers.
7709 This also prevents kernel data from leaking to application code. */
7710 return lookup_attribute ("syscall_linkage",
7711 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7714 /* Conditional return patterns can't represent the use of `b0' as
7715 the return address, so we force the value live this way. */
7719 /* Likewise for ar.pfs, which is used by br.ret. */
7727 /* Return true if REGNO is used by the frame unwinder. */
7730 ia64_eh_uses (int regno)
7732 if (! reload_completed)
7735 if (current_frame_info.reg_save_b0
7736 && regno == current_frame_info.reg_save_b0)
7738 if (current_frame_info.reg_save_pr
7739 && regno == current_frame_info.reg_save_pr)
7741 if (current_frame_info.reg_save_ar_pfs
7742 && regno == current_frame_info.reg_save_ar_pfs)
7744 if (current_frame_info.reg_save_ar_unat
7745 && regno == current_frame_info.reg_save_ar_unat)
7747 if (current_frame_info.reg_save_ar_lc
7748 && regno == current_frame_info.reg_save_ar_lc)
7754 /* Return true if this goes in small data/bss. */
7756 /* ??? We could also support own long data here. Generating movl/add/ld8
7757 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7758 code faster because there is one less load. This also includes incomplete
7759 types which can't go in sdata/sbss. */
7762 ia64_in_small_data_p (tree exp)
7764 if (TARGET_NO_SDATA)
7767 /* We want to merge strings, so we never consider them small data. */
7768 if (TREE_CODE (exp) == STRING_CST)
7771 /* Functions are never small data. */
7772 if (TREE_CODE (exp) == FUNCTION_DECL)
7775 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7777 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7778 if (strcmp (section, ".sdata") == 0
7779 || strcmp (section, ".sbss") == 0)
7784 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7786 /* If this is an incomplete type with size 0, then we can't put it
7787 in sdata because it might be too big when completed. */
7788 if (size > 0 && size <= ia64_section_threshold)
7795 /* Output assembly directives for prologue regions. */
7797 /* The current basic block number. */
7799 static bool last_block;
7801 /* True if we need a copy_state command at the start of the next block. */
7803 static bool need_copy_state;
7805 /* The function emits unwind directives for the start of an epilogue. */
7808 process_epilogue (void)
7810 /* If this isn't the last block of the function, then we need to label the
7811 current state, and copy it back in at the start of the next block. */
7815 fprintf (asm_out_file, "\t.label_state 1\n");
7816 need_copy_state = true;
7819 fprintf (asm_out_file, "\t.restore sp\n");
7822 /* This function processes a SET pattern looking for specific patterns
7823 which result in emitting an assembly directive required for unwinding. */
7826 process_set (FILE *asm_out_file, rtx pat)
7828 rtx src = SET_SRC (pat);
7829 rtx dest = SET_DEST (pat);
7830 int src_regno, dest_regno;
7832 /* Look for the ALLOC insn. */
7833 if (GET_CODE (src) == UNSPEC_VOLATILE
7834 && XINT (src, 1) == UNSPECV_ALLOC
7835 && GET_CODE (dest) == REG)
7837 dest_regno = REGNO (dest);
7839 /* If this isn't the final destination for ar.pfs, the alloc
7840 shouldn't have been marked frame related. */
7841 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7844 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7845 ia64_dbx_register_number (dest_regno));
7849 /* Look for SP = .... */
7850 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7852 if (GET_CODE (src) == PLUS)
7854 rtx op0 = XEXP (src, 0);
7855 rtx op1 = XEXP (src, 1);
7856 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7858 if (INTVAL (op1) < 0)
7859 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7862 process_epilogue ();
7867 else if (GET_CODE (src) == REG
7868 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7869 process_epilogue ();
7876 /* Register move we need to look at. */
7877 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7879 src_regno = REGNO (src);
7880 dest_regno = REGNO (dest);
7885 /* Saving return address pointer. */
7886 if (dest_regno != current_frame_info.reg_save_b0)
7888 fprintf (asm_out_file, "\t.save rp, r%d\n",
7889 ia64_dbx_register_number (dest_regno));
7893 if (dest_regno != current_frame_info.reg_save_pr)
7895 fprintf (asm_out_file, "\t.save pr, r%d\n",
7896 ia64_dbx_register_number (dest_regno));
7899 case AR_UNAT_REGNUM:
7900 if (dest_regno != current_frame_info.reg_save_ar_unat)
7902 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7903 ia64_dbx_register_number (dest_regno));
7907 if (dest_regno != current_frame_info.reg_save_ar_lc)
7909 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7910 ia64_dbx_register_number (dest_regno));
7913 case STACK_POINTER_REGNUM:
7914 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7915 || ! frame_pointer_needed)
7917 fprintf (asm_out_file, "\t.vframe r%d\n",
7918 ia64_dbx_register_number (dest_regno));
7922 /* Everything else should indicate being stored to memory. */
7927 /* Memory store we need to look at. */
7928 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7934 if (GET_CODE (XEXP (dest, 0)) == REG)
7936 base = XEXP (dest, 0);
7939 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7940 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7942 base = XEXP (XEXP (dest, 0), 0);
7943 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7948 if (base == hard_frame_pointer_rtx)
7950 saveop = ".savepsp";
7953 else if (base == stack_pointer_rtx)
7958 src_regno = REGNO (src);
7962 if (current_frame_info.reg_save_b0 != 0)
7964 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7968 if (current_frame_info.reg_save_pr != 0)
7970 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7974 if (current_frame_info.reg_save_ar_lc != 0)
7976 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7980 if (current_frame_info.reg_save_ar_pfs != 0)
7982 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7985 case AR_UNAT_REGNUM:
7986 if (current_frame_info.reg_save_ar_unat != 0)
7988 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7995 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7996 1 << (src_regno - GR_REG (4)));
8004 fprintf (asm_out_file, "\t.save.b 0x%x\n",
8005 1 << (src_regno - BR_REG (1)));
8012 fprintf (asm_out_file, "\t.save.f 0x%x\n",
8013 1 << (src_regno - FR_REG (2)));
8016 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
8017 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
8018 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
8019 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
8020 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
8021 1 << (src_regno - FR_REG (12)));
8033 /* This function looks at a single insn and emits any directives
8034 required to unwind this insn. */
8036 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
8038 if (flag_unwind_tables
8039 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
8043 if (GET_CODE (insn) == NOTE
8044 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
8046 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
8048 /* Restore unwind state from immediately before the epilogue. */
8049 if (need_copy_state)
8051 fprintf (asm_out_file, "\t.body\n");
8052 fprintf (asm_out_file, "\t.copy_state 1\n");
8053 need_copy_state = false;
8057 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
8060 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
8062 pat = XEXP (pat, 0);
8064 pat = PATTERN (insn);
8066 switch (GET_CODE (pat))
8069 process_set (asm_out_file, pat);
8075 int limit = XVECLEN (pat, 0);
8076 for (par_index = 0; par_index < limit; par_index++)
8078 rtx x = XVECEXP (pat, 0, par_index);
8079 if (GET_CODE (x) == SET)
8080 process_set (asm_out_file, x);
8093 ia64_init_builtins (void)
8095 tree psi_type_node = build_pointer_type (integer_type_node);
8096 tree pdi_type_node = build_pointer_type (long_integer_type_node);
8098 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
8099 tree si_ftype_psi_si_si
8100 = build_function_type_list (integer_type_node,
8101 psi_type_node, integer_type_node,
8102 integer_type_node, NULL_TREE);
8104 /* __sync_val_compare_and_swap_di */
8105 tree di_ftype_pdi_di_di
8106 = build_function_type_list (long_integer_type_node,
8107 pdi_type_node, long_integer_type_node,
8108 long_integer_type_node, NULL_TREE);
8109 /* __sync_bool_compare_and_swap_di */
8110 tree si_ftype_pdi_di_di
8111 = build_function_type_list (integer_type_node,
8112 pdi_type_node, long_integer_type_node,
8113 long_integer_type_node, NULL_TREE);
8114 /* __sync_synchronize */
8115 tree void_ftype_void
8116 = build_function_type (void_type_node, void_list_node);
8118 /* __sync_lock_test_and_set_si */
8119 tree si_ftype_psi_si
8120 = build_function_type_list (integer_type_node,
8121 psi_type_node, integer_type_node, NULL_TREE);
8123 /* __sync_lock_test_and_set_di */
8124 tree di_ftype_pdi_di
8125 = build_function_type_list (long_integer_type_node,
8126 pdi_type_node, long_integer_type_node,
8129 /* __sync_lock_release_si */
8131 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
8133 /* __sync_lock_release_di */
8135 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
8140 /* The __fpreg type. */
8141 fpreg_type = make_node (REAL_TYPE);
8142 /* ??? The back end should know to load/save __fpreg variables using
8143 the ldf.fill and stf.spill instructions. */
8144 TYPE_PRECISION (fpreg_type) = 96;
8145 layout_type (fpreg_type);
8146 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
8148 /* The __float80 type. */
8149 float80_type = make_node (REAL_TYPE);
8150 TYPE_PRECISION (float80_type) = 96;
8151 layout_type (float80_type);
8152 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
8154 /* The __float128 type. */
8157 tree float128_type = make_node (REAL_TYPE);
8158 TYPE_PRECISION (float128_type) = 128;
8159 layout_type (float128_type);
8160 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
8163 /* Under HPUX, this is a synonym for "long double". */
8164 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
8167 #define def_builtin(name, type, code) \
8168 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
8170 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
8171 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
8172 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
8173 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
8174 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
8175 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
8176 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
8177 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
8179 def_builtin ("__sync_synchronize", void_ftype_void,
8180 IA64_BUILTIN_SYNCHRONIZE);
8182 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
8183 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
8184 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
8185 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
8186 def_builtin ("__sync_lock_release_si", void_ftype_psi,
8187 IA64_BUILTIN_LOCK_RELEASE_SI);
8188 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
8189 IA64_BUILTIN_LOCK_RELEASE_DI);
8191 def_builtin ("__builtin_ia64_bsp",
8192 build_function_type (ptr_type_node, void_list_node),
8195 def_builtin ("__builtin_ia64_flushrs",
8196 build_function_type (void_type_node, void_list_node),
8197 IA64_BUILTIN_FLUSHRS);
8199 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
8200 IA64_BUILTIN_FETCH_AND_ADD_SI);
8201 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
8202 IA64_BUILTIN_FETCH_AND_SUB_SI);
8203 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
8204 IA64_BUILTIN_FETCH_AND_OR_SI);
8205 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
8206 IA64_BUILTIN_FETCH_AND_AND_SI);
8207 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
8208 IA64_BUILTIN_FETCH_AND_XOR_SI);
8209 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
8210 IA64_BUILTIN_FETCH_AND_NAND_SI);
8212 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
8213 IA64_BUILTIN_ADD_AND_FETCH_SI);
8214 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
8215 IA64_BUILTIN_SUB_AND_FETCH_SI);
8216 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
8217 IA64_BUILTIN_OR_AND_FETCH_SI);
8218 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
8219 IA64_BUILTIN_AND_AND_FETCH_SI);
8220 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
8221 IA64_BUILTIN_XOR_AND_FETCH_SI);
8222 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
8223 IA64_BUILTIN_NAND_AND_FETCH_SI);
8225 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
8226 IA64_BUILTIN_FETCH_AND_ADD_DI);
8227 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
8228 IA64_BUILTIN_FETCH_AND_SUB_DI);
8229 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
8230 IA64_BUILTIN_FETCH_AND_OR_DI);
8231 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
8232 IA64_BUILTIN_FETCH_AND_AND_DI);
8233 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
8234 IA64_BUILTIN_FETCH_AND_XOR_DI);
8235 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
8236 IA64_BUILTIN_FETCH_AND_NAND_DI);
8238 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
8239 IA64_BUILTIN_ADD_AND_FETCH_DI);
8240 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
8241 IA64_BUILTIN_SUB_AND_FETCH_DI);
8242 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
8243 IA64_BUILTIN_OR_AND_FETCH_DI);
8244 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
8245 IA64_BUILTIN_AND_AND_FETCH_DI);
8246 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
8247 IA64_BUILTIN_XOR_AND_FETCH_DI);
8248 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
8249 IA64_BUILTIN_NAND_AND_FETCH_DI);
8254 /* Expand fetch_and_op intrinsics. The basic code sequence is:
8262 cmpxchgsz.acq tmp = [ptr], tmp
8263 } while (tmp != ret)
8267 ia64_expand_fetch_and_op (optab binoptab, enum machine_mode mode,
8268 tree arglist, rtx target)
8270 rtx ret, label, tmp, ccv, insn, mem, value;
8273 arg0 = TREE_VALUE (arglist);
8274 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8275 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8276 #ifdef POINTERS_EXTEND_UNSIGNED
8277 if (GET_MODE(mem) != Pmode)
8278 mem = convert_memory_address (Pmode, mem);
8280 value = expand_expr (arg1, NULL_RTX, mode, 0);
8282 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8283 MEM_VOLATILE_P (mem) = 1;
8285 if (target && register_operand (target, mode))
8288 ret = gen_reg_rtx (mode);
8290 emit_insn (gen_mf ());
8292 /* Special case for fetchadd instructions. */
8293 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
8296 insn = gen_fetchadd_acq_si (ret, mem, value);
8298 insn = gen_fetchadd_acq_di (ret, mem, value);
8303 tmp = gen_reg_rtx (mode);
8304 /* ar.ccv must always be loaded with a zero-extended DImode value. */
8305 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8306 emit_move_insn (tmp, mem);
8308 label = gen_label_rtx ();
8310 emit_move_insn (ret, tmp);
8311 convert_move (ccv, tmp, /*unsignedp=*/1);
8313 /* Perform the specific operation. Special case NAND by noticing
8314 one_cmpl_optab instead. */
8315 if (binoptab == one_cmpl_optab)
8317 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8318 binoptab = and_optab;
8320 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
8323 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
8325 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
8328 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
8333 /* Expand op_and_fetch intrinsics. The basic code sequence is:
8340 ret = tmp <op> value;
8341 cmpxchgsz.acq tmp = [ptr], ret
8342 } while (tmp != old)
8346 ia64_expand_op_and_fetch (optab binoptab, enum machine_mode mode,
8347 tree arglist, rtx target)
8349 rtx old, label, tmp, ret, ccv, insn, mem, value;
8352 arg0 = TREE_VALUE (arglist);
8353 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8354 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8355 #ifdef POINTERS_EXTEND_UNSIGNED
8356 if (GET_MODE(mem) != Pmode)
8357 mem = convert_memory_address (Pmode, mem);
8360 value = expand_expr (arg1, NULL_RTX, mode, 0);
8362 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8363 MEM_VOLATILE_P (mem) = 1;
8365 if (target && ! register_operand (target, mode))
8368 emit_insn (gen_mf ());
8369 tmp = gen_reg_rtx (mode);
8370 old = gen_reg_rtx (mode);
8371 /* ar.ccv must always be loaded with a zero-extended DImode value. */
8372 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8374 emit_move_insn (tmp, mem);
8376 label = gen_label_rtx ();
8378 emit_move_insn (old, tmp);
8379 convert_move (ccv, tmp, /*unsignedp=*/1);
8381 /* Perform the specific operation. Special case NAND by noticing
8382 one_cmpl_optab instead. */
8383 if (binoptab == one_cmpl_optab)
8385 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8386 binoptab = and_optab;
8388 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
8391 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
8393 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
8396 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
8401 /* Expand val_ and bool_compare_and_swap. For val_ we want:
8405 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
8408 For bool_ it's the same except return ret == oldval.
8412 ia64_expand_compare_and_swap (enum machine_mode rmode, enum machine_mode mode,
8413 int boolp, tree arglist, rtx target)
8415 tree arg0, arg1, arg2;
8416 rtx mem, old, new, ccv, tmp, insn;
8418 arg0 = TREE_VALUE (arglist);
8419 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8420 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8421 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8422 old = expand_expr (arg1, NULL_RTX, mode, 0);
8423 new = expand_expr (arg2, NULL_RTX, mode, 0);
8425 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8426 MEM_VOLATILE_P (mem) = 1;
8428 if (GET_MODE (old) != mode)
8429 old = convert_to_mode (mode, old, /*unsignedp=*/1);
8430 if (GET_MODE (new) != mode)
8431 new = convert_to_mode (mode, new, /*unsignedp=*/1);
8433 if (! register_operand (old, mode))
8434 old = copy_to_mode_reg (mode, old);
8435 if (! register_operand (new, mode))
8436 new = copy_to_mode_reg (mode, new);
8438 if (! boolp && target && register_operand (target, mode))
8441 tmp = gen_reg_rtx (mode);
8443 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8444 convert_move (ccv, old, /*unsignedp=*/1);
8445 emit_insn (gen_mf ());
8447 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
8449 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
8455 target = gen_reg_rtx (rmode);
8456 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
8462 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
8465 ia64_expand_lock_test_and_set (enum machine_mode mode, tree arglist,
8469 rtx mem, new, ret, insn;
8471 arg0 = TREE_VALUE (arglist);
8472 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8473 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8474 new = expand_expr (arg1, NULL_RTX, mode, 0);
8476 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8477 MEM_VOLATILE_P (mem) = 1;
8478 if (! register_operand (new, mode))
8479 new = copy_to_mode_reg (mode, new);
8481 if (target && register_operand (target, mode))
8484 ret = gen_reg_rtx (mode);
8487 insn = gen_xchgsi (ret, mem, new);
8489 insn = gen_xchgdi (ret, mem, new);
8495 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
8498 ia64_expand_lock_release (enum machine_mode mode, tree arglist,
8499 rtx target ATTRIBUTE_UNUSED)
8504 arg0 = TREE_VALUE (arglist);
8505 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8507 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8508 MEM_VOLATILE_P (mem) = 1;
8510 emit_move_insn (mem, const0_rtx);
8516 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8517 enum machine_mode mode ATTRIBUTE_UNUSED,
8518 int ignore ATTRIBUTE_UNUSED)
8520 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8521 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8522 tree arglist = TREE_OPERAND (exp, 1);
8523 enum machine_mode rmode = VOIDmode;
8527 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8528 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8533 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8534 case IA64_BUILTIN_LOCK_RELEASE_SI:
8535 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8536 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8537 case IA64_BUILTIN_FETCH_AND_OR_SI:
8538 case IA64_BUILTIN_FETCH_AND_AND_SI:
8539 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8540 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8541 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8542 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8543 case IA64_BUILTIN_OR_AND_FETCH_SI:
8544 case IA64_BUILTIN_AND_AND_FETCH_SI:
8545 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8546 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8550 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8555 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8560 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8561 case IA64_BUILTIN_LOCK_RELEASE_DI:
8562 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8563 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8564 case IA64_BUILTIN_FETCH_AND_OR_DI:
8565 case IA64_BUILTIN_FETCH_AND_AND_DI:
8566 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8567 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8568 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8569 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8570 case IA64_BUILTIN_OR_AND_FETCH_DI:
8571 case IA64_BUILTIN_AND_AND_FETCH_DI:
8572 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8573 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8583 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8584 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8585 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8588 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8589 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8590 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8593 case IA64_BUILTIN_SYNCHRONIZE:
8594 emit_insn (gen_mf ());
8597 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8598 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8599 return ia64_expand_lock_test_and_set (mode, arglist, target);
8601 case IA64_BUILTIN_LOCK_RELEASE_SI:
8602 case IA64_BUILTIN_LOCK_RELEASE_DI:
8603 return ia64_expand_lock_release (mode, arglist, target);
8605 case IA64_BUILTIN_BSP:
8606 if (! target || ! register_operand (target, DImode))
8607 target = gen_reg_rtx (DImode);
8608 emit_insn (gen_bsp_value (target));
8609 #ifdef POINTERS_EXTEND_UNSIGNED
8610 target = convert_memory_address (ptr_mode, target);
8614 case IA64_BUILTIN_FLUSHRS:
8615 emit_insn (gen_flushrs ());
8618 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8619 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8620 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8622 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8623 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8624 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8626 case IA64_BUILTIN_FETCH_AND_OR_SI:
8627 case IA64_BUILTIN_FETCH_AND_OR_DI:
8628 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8630 case IA64_BUILTIN_FETCH_AND_AND_SI:
8631 case IA64_BUILTIN_FETCH_AND_AND_DI:
8632 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8634 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8635 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8636 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8638 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8639 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8640 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8642 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8643 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8644 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8646 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8647 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8648 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8650 case IA64_BUILTIN_OR_AND_FETCH_SI:
8651 case IA64_BUILTIN_OR_AND_FETCH_DI:
8652 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8654 case IA64_BUILTIN_AND_AND_FETCH_SI:
8655 case IA64_BUILTIN_AND_AND_FETCH_DI:
8656 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8658 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8659 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8660 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8662 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8663 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8664 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8673 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8674 most significant bits of the stack slot. */
8677 ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8679 /* Exception to normal case for structures/unions/etc. */
8681 if (type && AGGREGATE_TYPE_P (type)
8682 && int_size_in_bytes (type) < UNITS_PER_WORD)
8685 /* Fall back to the default. */
8686 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8689 /* Linked list of all external functions that are to be emitted by GCC.
8690 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8691 order to avoid putting out names that are never really used. */
8693 struct extern_func_list GTY(())
8695 struct extern_func_list *next;
8699 static GTY(()) struct extern_func_list *extern_func_head;
8702 ia64_hpux_add_extern_decl (tree decl)
8704 struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
8707 p->next = extern_func_head;
8708 extern_func_head = p;
8711 /* Print out the list of used global functions. */
8714 ia64_hpux_file_end (void)
8716 struct extern_func_list *p;
8718 for (p = extern_func_head; p; p = p->next)
8720 tree decl = p->decl;
8721 tree id = DECL_ASSEMBLER_NAME (decl);
8726 if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
8728 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8730 TREE_ASM_WRITTEN (decl) = 1;
8731 (*targetm.asm_out.globalize_label) (asm_out_file, name);
8732 fputs (TYPE_ASM_OP, asm_out_file);
8733 assemble_name (asm_out_file, name);
8734 fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
8738 extern_func_head = 0;
8741 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
8742 modes of word_mode and larger. Rename the TFmode libfuncs using the
8743 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8744 backward compatibility. */
8747 ia64_init_libfuncs (void)
8749 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
8750 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
8751 set_optab_libfunc (smod_optab, SImode, "__modsi3");
8752 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
8754 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8755 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8756 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8757 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8758 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8760 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8761 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8762 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8763 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8764 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8765 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8767 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8768 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8769 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8770 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8772 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8773 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8776 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8779 ia64_hpux_init_libfuncs (void)
8781 ia64_init_libfuncs ();
8783 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8784 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8785 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8787 /* ia64_expand_compare uses this. */
8788 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8790 /* These should never be used. */
8791 set_optab_libfunc (eq_optab, TFmode, 0);
8792 set_optab_libfunc (ne_optab, TFmode, 0);
8793 set_optab_libfunc (gt_optab, TFmode, 0);
8794 set_optab_libfunc (ge_optab, TFmode, 0);
8795 set_optab_libfunc (lt_optab, TFmode, 0);
8796 set_optab_libfunc (le_optab, TFmode, 0);
8799 /* Rename the division and modulus functions in VMS. */
8802 ia64_vms_init_libfuncs (void)
8804 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8805 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8806 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8807 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8808 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8809 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8810 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8811 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8814 /* Rename the TFmode libfuncs available from soft-fp in glibc using
8815 the HPUX conventions. */
8818 ia64_sysv4_init_libfuncs (void)
8820 ia64_init_libfuncs ();
8822 /* These functions are not part of the HPUX TFmode interface. We
8823 use them instead of _U_Qfcmp, which doesn't work the way we
8825 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
8826 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
8827 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
8828 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
8829 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
8830 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
8832 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8833 glibc doesn't have them. */
8836 /* Switch to the section to which we should output X. The only thing
8837 special we do here is to honor small data. */
8840 ia64_select_rtx_section (enum machine_mode mode, rtx x,
8841 unsigned HOST_WIDE_INT align)
8843 if (GET_MODE_SIZE (mode) > 0
8844 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8847 default_elf_select_rtx_section (mode, x, align);
8850 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8851 Pretend flag_pic is always set. */
8854 ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8856 default_elf_select_section_1 (exp, reloc, align, true);
8860 ia64_rwreloc_unique_section (tree decl, int reloc)
8862 default_unique_section_1 (decl, reloc, true);
8866 ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8867 unsigned HOST_WIDE_INT align)
8869 int save_pic = flag_pic;
8871 ia64_select_rtx_section (mode, x, align);
8872 flag_pic = save_pic;
8876 ia64_rwreloc_section_type_flags (tree decl, const char *name, int reloc)
8878 return default_section_type_flags_1 (decl, name, reloc, true);
8881 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8882 structure type and that the address of that type should be passed
8883 in out0, rather than in r8. */
8886 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
8888 tree ret_type = TREE_TYPE (fntype);
8890 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8891 as the structure return address parameter, if the return value
8892 type has a non-trivial copy constructor or destructor. It is not
8893 clear if this same convention should be used for other
8894 programming languages. Until G++ 3.4, we incorrectly used r8 for
8895 these return values. */
8896 return (abi_version_at_least (2)
8898 && TYPE_MODE (ret_type) == BLKmode
8899 && TREE_ADDRESSABLE (ret_type)
8900 && strcmp (lang_hooks.name, "GNU C++") == 0);
8903 /* Output the assembler code for a thunk function. THUNK_DECL is the
8904 declaration for the thunk function itself, FUNCTION is the decl for
8905 the target function. DELTA is an immediate constant offset to be
8906 added to THIS. If VCALL_OFFSET is nonzero, the word at
8907 *(*this + vcall_offset) should be added to THIS. */
8910 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8911 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8914 rtx this, insn, funexp;
8915 unsigned int this_parmno;
8916 unsigned int this_regno;
8918 reload_completed = 1;
8919 epilogue_completed = 1;
8921 reset_block_changes ();
8923 /* Set things up as ia64_expand_prologue might. */
8924 last_scratch_gr_reg = 15;
8926 memset (¤t_frame_info, 0, sizeof (current_frame_info));
8927 current_frame_info.spill_cfa_off = -16;
8928 current_frame_info.n_input_regs = 1;
8929 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8931 /* Mark the end of the (empty) prologue. */
8932 emit_note (NOTE_INSN_PROLOGUE_END);
8934 /* Figure out whether "this" will be the first parameter (the
8935 typical case) or the second parameter (as happens when the
8936 virtual function returns certain class objects). */
8938 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
8940 this_regno = IN_REG (this_parmno);
8941 if (!TARGET_REG_NAMES)
8942 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
8944 this = gen_rtx_REG (Pmode, this_regno);
8947 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
8948 REG_POINTER (tmp) = 1;
8949 if (delta && CONST_OK_FOR_I (delta))
8951 emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8955 emit_insn (gen_ptr_extend (this, tmp));
8958 /* Apply the constant offset, if required. */
8961 rtx delta_rtx = GEN_INT (delta);
8963 if (!CONST_OK_FOR_I (delta))
8965 rtx tmp = gen_rtx_REG (Pmode, 2);
8966 emit_move_insn (tmp, delta_rtx);
8969 emit_insn (gen_adddi3 (this, this, delta_rtx));
8972 /* Apply the offset from the vtable, if required. */
8975 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8976 rtx tmp = gen_rtx_REG (Pmode, 2);
8980 rtx t = gen_rtx_REG (ptr_mode, 2);
8981 REG_POINTER (t) = 1;
8982 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8983 if (CONST_OK_FOR_I (vcall_offset))
8985 emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8990 emit_insn (gen_ptr_extend (tmp, t));
8993 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8997 if (!CONST_OK_FOR_J (vcall_offset))
8999 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
9000 emit_move_insn (tmp2, vcall_offset_rtx);
9001 vcall_offset_rtx = tmp2;
9003 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
9007 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
9008 gen_rtx_MEM (ptr_mode, tmp));
9010 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
9012 emit_insn (gen_adddi3 (this, this, tmp));
9015 /* Generate a tail call to the target function. */
9016 if (! TREE_USED (function))
9018 assemble_external (function);
9019 TREE_USED (function) = 1;
9021 funexp = XEXP (DECL_RTL (function), 0);
9022 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
9023 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
9024 insn = get_last_insn ();
9025 SIBLING_CALL_P (insn) = 1;
9027 /* Code generation for calls relies on splitting. */
9028 reload_completed = 1;
9029 epilogue_completed = 1;
9030 try_split (PATTERN (insn), insn, 0);
9034 /* Run just enough of rest_of_compilation to get the insns emitted.
9035 There's not really enough bulk here to make other passes such as
9036 instruction scheduling worth while. Note that use_thunk calls
9037 assemble_start_function and assemble_end_function. */
9039 insn_locators_initialize ();
9040 emit_all_insn_group_barriers (NULL);
9041 insn = get_insns ();
9042 shorten_branches (insn);
9043 final_start_function (insn, file, 1);
9044 final (insn, file, 1, 0);
9045 final_end_function ();
9047 reload_completed = 0;
9048 epilogue_completed = 0;
9052 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9055 ia64_struct_value_rtx (tree fntype,
9056 int incoming ATTRIBUTE_UNUSED)
9058 if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
9060 return gen_rtx_REG (Pmode, GR_REG (8));
9063 #include "gt-ia64.h"