1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
25 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "sched-int.h"
48 #include "target-def.h"
51 #include "langhooks.h"
52 #include "cfglayout.h"
54 /* This is used for communication between ASM_OUTPUT_LABEL and
55 ASM_OUTPUT_LABELREF. */
56 int ia64_asm_output_label = 0;
58 /* Define the information needed to generate branch and scc insns. This is
59 stored from the compare operation. */
60 struct rtx_def * ia64_compare_op0;
61 struct rtx_def * ia64_compare_op1;
63 /* Register names for ia64_expand_prologue. */
64 static const char * const ia64_reg_numbers[96] =
65 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
66 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
67 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
68 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
69 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
70 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
71 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
72 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
73 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
74 "r104","r105","r106","r107","r108","r109","r110","r111",
75 "r112","r113","r114","r115","r116","r117","r118","r119",
76 "r120","r121","r122","r123","r124","r125","r126","r127"};
78 /* ??? These strings could be shared with REGISTER_NAMES. */
79 static const char * const ia64_input_reg_names[8] =
80 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
82 /* ??? These strings could be shared with REGISTER_NAMES. */
83 static const char * const ia64_local_reg_names[80] =
84 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
85 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
86 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
87 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
88 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
89 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
90 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
91 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
92 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
93 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
95 /* ??? These strings could be shared with REGISTER_NAMES. */
96 static const char * const ia64_output_reg_names[8] =
97 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
99 /* String used with the -mfixed-range= option. */
100 const char *ia64_fixed_range_string;
102 /* Determines whether we use adds, addl, or movl to generate our
103 TLS immediate offsets. */
104 int ia64_tls_size = 22;
106 /* String used with the -mtls-size= option. */
107 const char *ia64_tls_size_string;
109 /* Which cpu are we scheduling for. */
110 enum processor_type ia64_tune;
112 /* String used with the -tune= option. */
113 const char *ia64_tune_string;
115 /* Determines whether we run our final scheduling pass or not. We always
116 avoid the normal second scheduling pass. */
117 static int ia64_flag_schedule_insns2;
119 /* Variables which are this size or smaller are put in the sdata/sbss
122 unsigned int ia64_section_threshold;
124 /* The following variable is used by the DFA insn scheduler. The value is
125 TRUE if we do insn bundling instead of insn scheduling. */
128 /* Structure to be filled in by ia64_compute_frame_size with register
129 save masks and offsets for the current function. */
131 struct ia64_frame_info
133 HOST_WIDE_INT total_size; /* size of the stack frame, not including
134 the caller's scratch area. */
135 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
136 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
137 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
138 HARD_REG_SET mask; /* mask of saved registers. */
139 unsigned int gr_used_mask; /* mask of registers in use as gr spill
140 registers or long-term scratches. */
141 int n_spilled; /* number of spilled registers. */
142 int reg_fp; /* register for fp. */
143 int reg_save_b0; /* save register for b0. */
144 int reg_save_pr; /* save register for prs. */
145 int reg_save_ar_pfs; /* save register for ar.pfs. */
146 int reg_save_ar_unat; /* save register for ar.unat. */
147 int reg_save_ar_lc; /* save register for ar.lc. */
148 int reg_save_gp; /* save register for gp. */
149 int n_input_regs; /* number of input registers used. */
150 int n_local_regs; /* number of local registers used. */
151 int n_output_regs; /* number of output registers used. */
152 int n_rotate_regs; /* number of rotating registers used. */
154 char need_regstk; /* true if a .regstk directive needed. */
155 char initialized; /* true if the data is finalized. */
158 /* Current frame information calculated by ia64_compute_frame_size. */
159 static struct ia64_frame_info current_frame_info;
161 static int ia64_use_dfa_pipeline_interface PARAMS ((void));
162 static int ia64_first_cycle_multipass_dfa_lookahead PARAMS ((void));
163 static void ia64_dependencies_evaluation_hook PARAMS ((rtx, rtx));
164 static void ia64_init_dfa_pre_cycle_insn PARAMS ((void));
165 static rtx ia64_dfa_pre_cycle_insn PARAMS ((void));
166 static int ia64_first_cycle_multipass_dfa_lookahead_guard PARAMS ((rtx));
167 static int ia64_dfa_new_cycle PARAMS ((FILE *, int, rtx, int, int, int *));
168 static rtx gen_tls_get_addr PARAMS ((void));
169 static rtx gen_thread_pointer PARAMS ((void));
170 static rtx ia64_expand_tls_address PARAMS ((enum tls_model, rtx, rtx));
171 static int find_gr_spill PARAMS ((int));
172 static int next_scratch_gr_reg PARAMS ((void));
173 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
174 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
175 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
176 static void finish_spill_pointers PARAMS ((void));
177 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
178 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
179 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
180 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
181 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
182 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
184 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
185 static bool ia64_function_ok_for_sibcall PARAMS ((tree, tree));
186 static bool ia64_rtx_costs PARAMS ((rtx, int, int, int *));
187 static void fix_range PARAMS ((const char *));
188 static struct machine_function * ia64_init_machine_status PARAMS ((void));
189 static void emit_insn_group_barriers PARAMS ((FILE *));
190 static void emit_all_insn_group_barriers PARAMS ((FILE *));
191 static void final_emit_insn_group_barriers PARAMS ((FILE *));
192 static void emit_predicate_relation_info PARAMS ((void));
193 static void ia64_reorg PARAMS ((void));
194 static bool ia64_in_small_data_p PARAMS ((tree));
195 static void process_epilogue PARAMS ((void));
196 static int process_set PARAMS ((FILE *, rtx));
198 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
200 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
202 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode,
205 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
207 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
208 static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
209 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
210 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
211 static void ia64_output_function_end_prologue PARAMS ((FILE *));
213 static int ia64_issue_rate PARAMS ((void));
214 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
215 static void ia64_sched_init PARAMS ((FILE *, int, int));
216 static void ia64_sched_finish PARAMS ((FILE *, int));
217 static int ia64_dfa_sched_reorder PARAMS ((FILE *, int, rtx *, int *,
219 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
220 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
221 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
223 static struct bundle_state *get_free_bundle_state PARAMS ((void));
224 static void free_bundle_state PARAMS ((struct bundle_state *));
225 static void initiate_bundle_states PARAMS ((void));
226 static void finish_bundle_states PARAMS ((void));
227 static unsigned bundle_state_hash PARAMS ((const void *));
228 static int bundle_state_eq_p PARAMS ((const void *, const void *));
229 static int insert_bundle_state PARAMS ((struct bundle_state *));
230 static void initiate_bundle_state_table PARAMS ((void));
231 static void finish_bundle_state_table PARAMS ((void));
232 static int try_issue_nops PARAMS ((struct bundle_state *, int));
233 static int try_issue_insn PARAMS ((struct bundle_state *, rtx));
234 static void issue_nops_and_insn PARAMS ((struct bundle_state *, int,
236 static int get_max_pos PARAMS ((state_t));
237 static int get_template PARAMS ((state_t, int));
239 static rtx get_next_important_insn PARAMS ((rtx, rtx));
240 static void bundling PARAMS ((FILE *, int, rtx, rtx));
242 static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
243 HOST_WIDE_INT, tree));
244 static void ia64_file_start PARAMS ((void));
246 static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx,
247 unsigned HOST_WIDE_INT));
248 static void ia64_rwreloc_select_section PARAMS ((tree, int,
249 unsigned HOST_WIDE_INT))
251 static void ia64_rwreloc_unique_section PARAMS ((tree, int))
253 static void ia64_rwreloc_select_rtx_section PARAMS ((enum machine_mode, rtx,
254 unsigned HOST_WIDE_INT))
256 static unsigned int ia64_rwreloc_section_type_flags
257 PARAMS ((tree, const char *, int))
260 static void ia64_hpux_add_extern_decl PARAMS ((const char *name))
262 static void ia64_hpux_file_end PARAMS ((void))
265 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
266 static void ia64_encode_section_info (tree, rtx, int);
269 /* Table of valid machine attributes. */
270 static const struct attribute_spec ia64_attribute_table[] =
272 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
273 { "syscall_linkage", 0, 0, false, true, true, NULL },
274 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
275 { NULL, 0, 0, false, false, false, NULL }
278 /* Initialize the GCC target structure. */
279 #undef TARGET_ATTRIBUTE_TABLE
280 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
282 #undef TARGET_INIT_BUILTINS
283 #define TARGET_INIT_BUILTINS ia64_init_builtins
285 #undef TARGET_EXPAND_BUILTIN
286 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
288 #undef TARGET_ASM_BYTE_OP
289 #define TARGET_ASM_BYTE_OP "\tdata1\t"
290 #undef TARGET_ASM_ALIGNED_HI_OP
291 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
292 #undef TARGET_ASM_ALIGNED_SI_OP
293 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
294 #undef TARGET_ASM_ALIGNED_DI_OP
295 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
296 #undef TARGET_ASM_UNALIGNED_HI_OP
297 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
298 #undef TARGET_ASM_UNALIGNED_SI_OP
299 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
300 #undef TARGET_ASM_UNALIGNED_DI_OP
301 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
302 #undef TARGET_ASM_INTEGER
303 #define TARGET_ASM_INTEGER ia64_assemble_integer
305 #undef TARGET_ASM_FUNCTION_PROLOGUE
306 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
307 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
308 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
309 #undef TARGET_ASM_FUNCTION_EPILOGUE
310 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
312 #undef TARGET_IN_SMALL_DATA_P
313 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
315 #undef TARGET_SCHED_ADJUST_COST
316 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
317 #undef TARGET_SCHED_ISSUE_RATE
318 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
319 #undef TARGET_SCHED_VARIABLE_ISSUE
320 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
321 #undef TARGET_SCHED_INIT
322 #define TARGET_SCHED_INIT ia64_sched_init
323 #undef TARGET_SCHED_FINISH
324 #define TARGET_SCHED_FINISH ia64_sched_finish
325 #undef TARGET_SCHED_REORDER
326 #define TARGET_SCHED_REORDER ia64_sched_reorder
327 #undef TARGET_SCHED_REORDER2
328 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
330 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
331 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
333 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
334 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia64_use_dfa_pipeline_interface
336 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
337 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
339 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
340 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
341 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
342 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
344 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
345 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
346 ia64_first_cycle_multipass_dfa_lookahead_guard
348 #undef TARGET_SCHED_DFA_NEW_CYCLE
349 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
352 #undef TARGET_HAVE_TLS
353 #define TARGET_HAVE_TLS true
356 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
357 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
359 #undef TARGET_ASM_OUTPUT_MI_THUNK
360 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
361 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
362 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
364 #undef TARGET_ASM_FILE_START
365 #define TARGET_ASM_FILE_START ia64_file_start
367 #undef TARGET_RTX_COSTS
368 #define TARGET_RTX_COSTS ia64_rtx_costs
369 #undef TARGET_ADDRESS_COST
370 #define TARGET_ADDRESS_COST hook_int_rtx_0
372 #undef TARGET_MACHINE_DEPENDENT_REORG
373 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
375 #undef TARGET_ENCODE_SECTION_INFO
376 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
378 struct gcc_target targetm = TARGET_INITIALIZER;
380 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
383 call_operand (op, mode)
385 enum machine_mode mode;
387 if (mode != GET_MODE (op) && mode != VOIDmode)
390 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
391 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
394 /* Return 1 if OP refers to a symbol in the sdata section. */
397 sdata_symbolic_operand (op, mode)
399 enum machine_mode mode ATTRIBUTE_UNUSED;
401 switch (GET_CODE (op))
404 if (GET_CODE (XEXP (op, 0)) != PLUS
405 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
407 op = XEXP (XEXP (op, 0), 0);
411 if (CONSTANT_POOL_ADDRESS_P (op))
412 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
414 return SYMBOL_REF_LOCAL_P (op) && SYMBOL_REF_SMALL_P (op);
424 small_addr_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
426 return SYMBOL_REF_SMALL_ADDR_P (op);
429 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
432 got_symbolic_operand (op, mode)
434 enum machine_mode mode ATTRIBUTE_UNUSED;
436 switch (GET_CODE (op))
440 if (GET_CODE (op) != PLUS)
442 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
445 if (GET_CODE (op) != CONST_INT)
450 /* Ok if we're not using GOT entries at all. */
451 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
454 /* "Ok" while emitting rtl, since otherwise we won't be provided
455 with the entire offset during emission, which makes it very
456 hard to split the offset into high and low parts. */
457 if (rtx_equal_function_value_matters)
460 /* Force the low 14 bits of the constant to zero so that we do not
461 use up so many GOT entries. */
462 return (INTVAL (op) & 0x3fff) == 0;
465 if (SYMBOL_REF_SMALL_ADDR_P (op))
476 /* Return 1 if OP refers to a symbol. */
479 symbolic_operand (op, mode)
481 enum machine_mode mode ATTRIBUTE_UNUSED;
483 switch (GET_CODE (op))
496 /* Return tls_model if OP refers to a TLS symbol. */
499 tls_symbolic_operand (op, mode)
501 enum machine_mode mode ATTRIBUTE_UNUSED;
503 if (GET_CODE (op) != SYMBOL_REF)
505 return SYMBOL_REF_TLS_MODEL (op);
509 /* Return 1 if OP refers to a function. */
512 function_operand (op, mode)
514 enum machine_mode mode ATTRIBUTE_UNUSED;
516 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (op))
522 /* Return 1 if OP is setjmp or a similar function. */
524 /* ??? This is an unsatisfying solution. Should rethink. */
527 setjmp_operand (op, mode)
529 enum machine_mode mode ATTRIBUTE_UNUSED;
534 if (GET_CODE (op) != SYMBOL_REF)
539 /* The following code is borrowed from special_function_p in calls.c. */
541 /* Disregard prefix _, __ or __x. */
544 if (name[1] == '_' && name[2] == 'x')
546 else if (name[1] == '_')
556 && (! strcmp (name, "setjmp")
557 || ! strcmp (name, "setjmp_syscall")))
559 && ! strcmp (name, "sigsetjmp"))
561 && ! strcmp (name, "savectx")));
563 else if ((name[0] == 'q' && name[1] == 's'
564 && ! strcmp (name, "qsetjmp"))
565 || (name[0] == 'v' && name[1] == 'f'
566 && ! strcmp (name, "vfork")))
572 /* Return 1 if OP is a general operand, excluding tls symbolic operands. */
575 move_operand (op, mode)
577 enum machine_mode mode;
579 return general_operand (op, mode) && !tls_symbolic_operand (op, mode);
582 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
585 gr_register_operand (op, mode)
587 enum machine_mode mode;
589 if (! register_operand (op, mode))
591 if (GET_CODE (op) == SUBREG)
592 op = SUBREG_REG (op);
593 if (GET_CODE (op) == REG)
595 unsigned int regno = REGNO (op);
596 if (regno < FIRST_PSEUDO_REGISTER)
597 return GENERAL_REGNO_P (regno);
602 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
605 fr_register_operand (op, mode)
607 enum machine_mode mode;
609 if (! register_operand (op, mode))
611 if (GET_CODE (op) == SUBREG)
612 op = SUBREG_REG (op);
613 if (GET_CODE (op) == REG)
615 unsigned int regno = REGNO (op);
616 if (regno < FIRST_PSEUDO_REGISTER)
617 return FR_REGNO_P (regno);
622 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
625 grfr_register_operand (op, mode)
627 enum machine_mode mode;
629 if (! register_operand (op, mode))
631 if (GET_CODE (op) == SUBREG)
632 op = SUBREG_REG (op);
633 if (GET_CODE (op) == REG)
635 unsigned int regno = REGNO (op);
636 if (regno < FIRST_PSEUDO_REGISTER)
637 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
642 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
645 gr_nonimmediate_operand (op, mode)
647 enum machine_mode mode;
649 if (! nonimmediate_operand (op, mode))
651 if (GET_CODE (op) == SUBREG)
652 op = SUBREG_REG (op);
653 if (GET_CODE (op) == REG)
655 unsigned int regno = REGNO (op);
656 if (regno < FIRST_PSEUDO_REGISTER)
657 return GENERAL_REGNO_P (regno);
662 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
665 fr_nonimmediate_operand (op, mode)
667 enum machine_mode mode;
669 if (! nonimmediate_operand (op, mode))
671 if (GET_CODE (op) == SUBREG)
672 op = SUBREG_REG (op);
673 if (GET_CODE (op) == REG)
675 unsigned int regno = REGNO (op);
676 if (regno < FIRST_PSEUDO_REGISTER)
677 return FR_REGNO_P (regno);
682 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
685 grfr_nonimmediate_operand (op, mode)
687 enum machine_mode mode;
689 if (! nonimmediate_operand (op, mode))
691 if (GET_CODE (op) == SUBREG)
692 op = SUBREG_REG (op);
693 if (GET_CODE (op) == REG)
695 unsigned int regno = REGNO (op);
696 if (regno < FIRST_PSEUDO_REGISTER)
697 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
702 /* Return 1 if OP is a GR register operand, or zero. */
705 gr_reg_or_0_operand (op, mode)
707 enum machine_mode mode;
709 return (op == const0_rtx || gr_register_operand (op, mode));
712 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
715 gr_reg_or_5bit_operand (op, mode)
717 enum machine_mode mode;
719 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
720 || GET_CODE (op) == CONSTANT_P_RTX
721 || gr_register_operand (op, mode));
724 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
727 gr_reg_or_6bit_operand (op, mode)
729 enum machine_mode mode;
731 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
732 || GET_CODE (op) == CONSTANT_P_RTX
733 || gr_register_operand (op, mode));
736 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
739 gr_reg_or_8bit_operand (op, mode)
741 enum machine_mode mode;
743 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
744 || GET_CODE (op) == CONSTANT_P_RTX
745 || gr_register_operand (op, mode));
748 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
751 grfr_reg_or_8bit_operand (op, mode)
753 enum machine_mode mode;
755 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
756 || GET_CODE (op) == CONSTANT_P_RTX
757 || grfr_register_operand (op, mode));
760 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
764 gr_reg_or_8bit_adjusted_operand (op, mode)
766 enum machine_mode mode;
768 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
769 || GET_CODE (op) == CONSTANT_P_RTX
770 || gr_register_operand (op, mode));
773 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
774 immediate and an 8 bit adjusted immediate operand. This is necessary
775 because when we emit a compare, we don't know what the condition will be,
776 so we need the union of the immediates accepted by GT and LT. */
779 gr_reg_or_8bit_and_adjusted_operand (op, mode)
781 enum machine_mode mode;
783 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
784 && CONST_OK_FOR_L (INTVAL (op)))
785 || GET_CODE (op) == CONSTANT_P_RTX
786 || gr_register_operand (op, mode));
789 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
792 gr_reg_or_14bit_operand (op, mode)
794 enum machine_mode mode;
796 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
797 || GET_CODE (op) == CONSTANT_P_RTX
798 || gr_register_operand (op, mode));
801 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
804 gr_reg_or_22bit_operand (op, mode)
806 enum machine_mode mode;
808 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
809 || GET_CODE (op) == CONSTANT_P_RTX
810 || gr_register_operand (op, mode));
813 /* Return 1 if OP is a 6 bit immediate operand. */
816 shift_count_operand (op, mode)
818 enum machine_mode mode ATTRIBUTE_UNUSED;
820 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
821 || GET_CODE (op) == CONSTANT_P_RTX);
824 /* Return 1 if OP is a 5 bit immediate operand. */
827 shift_32bit_count_operand (op, mode)
829 enum machine_mode mode ATTRIBUTE_UNUSED;
831 return ((GET_CODE (op) == CONST_INT
832 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
833 || GET_CODE (op) == CONSTANT_P_RTX);
836 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
839 shladd_operand (op, mode)
841 enum machine_mode mode ATTRIBUTE_UNUSED;
843 return (GET_CODE (op) == CONST_INT
844 && (INTVAL (op) == 2 || INTVAL (op) == 4
845 || INTVAL (op) == 8 || INTVAL (op) == 16));
848 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
851 fetchadd_operand (op, mode)
853 enum machine_mode mode ATTRIBUTE_UNUSED;
855 return (GET_CODE (op) == CONST_INT
856 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
857 INTVAL (op) == -4 || INTVAL (op) == -1 ||
858 INTVAL (op) == 1 || INTVAL (op) == 4 ||
859 INTVAL (op) == 8 || INTVAL (op) == 16));
862 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
865 fr_reg_or_fp01_operand (op, mode)
867 enum machine_mode mode;
869 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
870 || fr_register_operand (op, mode));
873 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
874 POST_MODIFY with a REG as displacement. */
877 destination_operand (op, mode)
879 enum machine_mode mode;
881 if (! nonimmediate_operand (op, mode))
883 if (GET_CODE (op) == MEM
884 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
885 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
890 /* Like memory_operand, but don't allow post-increments. */
893 not_postinc_memory_operand (op, mode)
895 enum machine_mode mode;
897 return (memory_operand (op, mode)
898 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
901 /* Return 1 if this is a comparison operator, which accepts a normal 8-bit
902 signed immediate operand. */
905 normal_comparison_operator (op, mode)
907 enum machine_mode mode;
909 enum rtx_code code = GET_CODE (op);
910 return ((mode == VOIDmode || GET_MODE (op) == mode)
911 && (code == EQ || code == NE
912 || code == GT || code == LE || code == GTU || code == LEU));
915 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
916 signed immediate operand. */
919 adjusted_comparison_operator (op, mode)
921 enum machine_mode mode;
923 enum rtx_code code = GET_CODE (op);
924 return ((mode == VOIDmode || GET_MODE (op) == mode)
925 && (code == LT || code == GE || code == LTU || code == GEU));
928 /* Return 1 if this is a signed inequality operator. */
931 signed_inequality_operator (op, mode)
933 enum machine_mode mode;
935 enum rtx_code code = GET_CODE (op);
936 return ((mode == VOIDmode || GET_MODE (op) == mode)
937 && (code == GE || code == GT
938 || code == LE || code == LT));
941 /* Return 1 if this operator is valid for predication. */
944 predicate_operator (op, mode)
946 enum machine_mode mode;
948 enum rtx_code code = GET_CODE (op);
949 return ((GET_MODE (op) == mode || mode == VOIDmode)
950 && (code == EQ || code == NE));
953 /* Return 1 if this operator can be used in a conditional operation. */
956 condop_operator (op, mode)
958 enum machine_mode mode;
960 enum rtx_code code = GET_CODE (op);
961 return ((GET_MODE (op) == mode || mode == VOIDmode)
962 && (code == PLUS || code == MINUS || code == AND
963 || code == IOR || code == XOR));
966 /* Return 1 if this is the ar.lc register. */
969 ar_lc_reg_operand (op, mode)
971 enum machine_mode mode;
973 return (GET_MODE (op) == DImode
974 && (mode == DImode || mode == VOIDmode)
975 && GET_CODE (op) == REG
976 && REGNO (op) == AR_LC_REGNUM);
979 /* Return 1 if this is the ar.ccv register. */
982 ar_ccv_reg_operand (op, mode)
984 enum machine_mode mode;
986 return ((GET_MODE (op) == mode || mode == VOIDmode)
987 && GET_CODE (op) == REG
988 && REGNO (op) == AR_CCV_REGNUM);
991 /* Return 1 if this is the ar.pfs register. */
994 ar_pfs_reg_operand (op, mode)
996 enum machine_mode mode;
998 return ((GET_MODE (op) == mode || mode == VOIDmode)
999 && GET_CODE (op) == REG
1000 && REGNO (op) == AR_PFS_REGNUM);
1003 /* Like general_operand, but don't allow (mem (addressof)). */
1006 general_tfmode_operand (op, mode)
1008 enum machine_mode mode;
1010 if (! general_operand (op, mode))
1012 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
1020 destination_tfmode_operand (op, mode)
1022 enum machine_mode mode;
1024 if (! destination_operand (op, mode))
1026 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
1034 tfreg_or_fp01_operand (op, mode)
1036 enum machine_mode mode;
1038 if (GET_CODE (op) == SUBREG)
1040 return fr_reg_or_fp01_operand (op, mode);
1043 /* Return 1 if OP is valid as a base register in a reg + offset address. */
1046 basereg_operand (op, mode)
1048 enum machine_mode mode;
1050 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
1051 checks from pa.c basereg_operand as well? Seems to be OK without them
1054 return (register_operand (op, mode) &&
1055 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
1060 ADDR_AREA_NORMAL, /* normal address area */
1061 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
1065 static GTY(()) tree small_ident1;
1066 static GTY(()) tree small_ident2;
1071 if (small_ident1 == 0)
1073 small_ident1 = get_identifier ("small");
1074 small_ident2 = get_identifier ("__small__");
1078 /* Retrieve the address area that has been chosen for the given decl. */
1080 static ia64_addr_area
1081 ia64_get_addr_area (tree decl)
1085 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
1091 id = TREE_VALUE (TREE_VALUE (model_attr));
1092 if (id == small_ident1 || id == small_ident2)
1093 return ADDR_AREA_SMALL;
1095 return ADDR_AREA_NORMAL;
1099 ia64_handle_model_attribute (tree *node, tree name,
1101 int flags ATTRIBUTE_UNUSED,
1104 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
1105 ia64_addr_area area;
1106 tree arg, decl = *node;
1109 arg = TREE_VALUE (args);
1110 if (arg == small_ident1 || arg == small_ident2)
1112 addr_area = ADDR_AREA_SMALL;
1116 warning ("invalid argument of `%s' attribute",
1117 IDENTIFIER_POINTER (name));
1118 *no_add_attrs = true;
1121 switch (TREE_CODE (decl))
1124 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
1126 && !TREE_STATIC (decl))
1128 error ("%Ha an address area attribute cannot be specified for "
1129 "local variables", &DECL_SOURCE_LOCATION (decl), decl);
1130 *no_add_attrs = true;
1132 area = ia64_get_addr_area (decl);
1133 if (area != ADDR_AREA_NORMAL && addr_area != area)
1135 error ("%Ha address area of '%s' conflicts with previous "
1136 "declaration", &DECL_SOURCE_LOCATION (decl), decl);
1137 *no_add_attrs = true;
1142 error ("%Ha address area attribute cannot be specified for functions",
1143 &DECL_SOURCE_LOCATION (decl), decl);
1144 *no_add_attrs = true;
1148 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1149 *no_add_attrs = true;
1157 ia64_encode_addr_area (tree decl, rtx symbol)
1161 flags = SYMBOL_REF_FLAGS (symbol);
1162 switch (ia64_get_addr_area (decl))
1164 case ADDR_AREA_NORMAL: break;
1165 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
1168 SYMBOL_REF_FLAGS (symbol) = flags;
1172 ia64_encode_section_info (tree decl, rtx rtl, int first)
1174 default_encode_section_info (decl, rtl, first);
1176 if (TREE_CODE (decl) == VAR_DECL
1177 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
1178 ia64_encode_addr_area (decl, XEXP (rtl, 0));
1181 /* Return 1 if the operands of a move are ok. */
1184 ia64_move_ok (dst, src)
1187 /* If we're under init_recog_no_volatile, we'll not be able to use
1188 memory_operand. So check the code directly and don't worry about
1189 the validity of the underlying address, which should have been
1190 checked elsewhere anyway. */
1191 if (GET_CODE (dst) != MEM)
1193 if (GET_CODE (src) == MEM)
1195 if (register_operand (src, VOIDmode))
1198 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
1199 if (INTEGRAL_MODE_P (GET_MODE (dst)))
1200 return src == const0_rtx;
1202 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1205 /* Return 0 if we are doing C++ code. This optimization fails with
1206 C++ because of GNAT c++/6685. */
1209 addp4_optimize_ok (op1, op2)
1213 if (!strcmp (lang_hooks.name, "GNU C++"))
1216 return (basereg_operand (op1, GET_MODE(op1)) !=
1217 basereg_operand (op2, GET_MODE(op2)));
1220 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
1221 Return the length of the field, or <= 0 on failure. */
1224 ia64_depz_field_mask (rop, rshift)
1227 unsigned HOST_WIDE_INT op = INTVAL (rop);
1228 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1230 /* Get rid of the zero bits we're shifting in. */
1233 /* We must now have a solid block of 1's at bit 0. */
1234 return exact_log2 (op + 1);
1237 /* Expand a symbolic constant load. */
1240 ia64_expand_load_address (dest, src)
1243 if (tls_symbolic_operand (src, VOIDmode))
1245 if (GET_CODE (dest) != REG)
1248 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1249 having to pointer-extend the value afterward. Other forms of address
1250 computation below are also more natural to compute as 64-bit quantities.
1251 If we've been given an SImode destination register, change it. */
1252 if (GET_MODE (dest) != Pmode)
1253 dest = gen_rtx_REG (Pmode, REGNO (dest));
1255 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (src))
1257 emit_insn (gen_rtx_SET (VOIDmode, dest, src));
1260 else if (TARGET_AUTO_PIC)
1262 emit_insn (gen_load_gprel64 (dest, src));
1265 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1267 emit_insn (gen_load_fptr (dest, src));
1270 else if (sdata_symbolic_operand (src, VOIDmode))
1272 emit_insn (gen_load_gprel (dest, src));
1276 if (GET_CODE (src) == CONST
1277 && GET_CODE (XEXP (src, 0)) == PLUS
1278 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1279 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1281 rtx sym = XEXP (XEXP (src, 0), 0);
1282 HOST_WIDE_INT ofs, hi, lo;
1284 /* Split the offset into a sign extended 14-bit low part
1285 and a complementary high part. */
1286 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1287 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1290 ia64_expand_load_address (dest, plus_constant (sym, hi));
1291 emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
1297 tmp = gen_rtx_HIGH (Pmode, src);
1298 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1299 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1301 tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
1302 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1306 static GTY(()) rtx gen_tls_tga;
1311 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1315 static GTY(()) rtx thread_pointer_rtx;
1317 gen_thread_pointer ()
1319 if (!thread_pointer_rtx)
1321 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1322 RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1324 return thread_pointer_rtx;
1328 ia64_expand_tls_address (tls_kind, op0, op1)
1329 enum tls_model tls_kind;
1332 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1336 case TLS_MODEL_GLOBAL_DYNAMIC:
1339 tga_op1 = gen_reg_rtx (Pmode);
1340 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1341 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1342 RTX_UNCHANGING_P (tga_op1) = 1;
1344 tga_op2 = gen_reg_rtx (Pmode);
1345 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1346 tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1347 RTX_UNCHANGING_P (tga_op2) = 1;
1349 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1350 LCT_CONST, Pmode, 2, tga_op1,
1351 Pmode, tga_op2, Pmode);
1353 insns = get_insns ();
1356 emit_libcall_block (insns, op0, tga_ret, op1);
1359 case TLS_MODEL_LOCAL_DYNAMIC:
1360 /* ??? This isn't the completely proper way to do local-dynamic
1361 If the call to __tls_get_addr is used only by a single symbol,
1362 then we should (somehow) move the dtprel to the second arg
1363 to avoid the extra add. */
1366 tga_op1 = gen_reg_rtx (Pmode);
1367 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1368 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1369 RTX_UNCHANGING_P (tga_op1) = 1;
1371 tga_op2 = const0_rtx;
1373 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1374 LCT_CONST, Pmode, 2, tga_op1,
1375 Pmode, tga_op2, Pmode);
1377 insns = get_insns ();
1380 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1382 tmp = gen_reg_rtx (Pmode);
1383 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1385 if (register_operand (op0, Pmode))
1388 tga_ret = gen_reg_rtx (Pmode);
1391 emit_insn (gen_load_dtprel (tga_ret, op1));
1392 emit_insn (gen_adddi3 (tga_ret, tmp, tga_ret));
1395 emit_insn (gen_add_dtprel (tga_ret, tmp, op1));
1397 return (tga_ret == op0 ? NULL_RTX : tga_ret);
1399 case TLS_MODEL_INITIAL_EXEC:
1400 tmp = gen_reg_rtx (Pmode);
1401 emit_insn (gen_load_ltoff_tprel (tmp, op1));
1402 tmp = gen_rtx_MEM (Pmode, tmp);
1403 RTX_UNCHANGING_P (tmp) = 1;
1404 tmp = force_reg (Pmode, tmp);
1406 if (register_operand (op0, Pmode))
1409 op1 = gen_reg_rtx (Pmode);
1410 emit_insn (gen_adddi3 (op1, tmp, gen_thread_pointer ()));
1412 return (op1 == op0 ? NULL_RTX : op1);
1414 case TLS_MODEL_LOCAL_EXEC:
1415 if (register_operand (op0, Pmode))
1418 tmp = gen_reg_rtx (Pmode);
1421 emit_insn (gen_load_tprel (tmp, op1));
1422 emit_insn (gen_adddi3 (tmp, gen_thread_pointer (), tmp));
1425 emit_insn (gen_add_tprel (tmp, gen_thread_pointer (), op1));
1427 return (tmp == op0 ? NULL_RTX : tmp);
1435 ia64_expand_move (op0, op1)
1438 enum machine_mode mode = GET_MODE (op0);
1440 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1441 op1 = force_reg (mode, op1);
1443 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1445 enum tls_model tls_kind;
1446 if ((tls_kind = tls_symbolic_operand (op1, VOIDmode)))
1447 return ia64_expand_tls_address (tls_kind, op0, op1);
1449 if (!TARGET_NO_PIC && reload_completed)
1451 ia64_expand_load_address (op0, op1);
1459 /* Split a move from OP1 to OP0 conditional on COND. */
1462 ia64_emit_cond_move (op0, op1, cond)
1465 rtx insn, first = get_last_insn ();
1467 emit_move_insn (op0, op1);
1469 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1471 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1475 /* Split a post-reload TImode reference into two DImode components. */
1478 ia64_split_timode (out, in, scratch)
1482 switch (GET_CODE (in))
1485 out[0] = gen_rtx_REG (DImode, REGNO (in));
1486 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1491 rtx base = XEXP (in, 0);
1493 switch (GET_CODE (base))
1496 out[0] = adjust_address (in, DImode, 0);
1499 base = XEXP (base, 0);
1500 out[0] = adjust_address (in, DImode, 0);
1503 /* Since we're changing the mode, we need to change to POST_MODIFY
1504 as well to preserve the size of the increment. Either that or
1505 do the update in two steps, but we've already got this scratch
1506 register handy so let's use it. */
1508 base = XEXP (base, 0);
1510 = change_address (in, DImode,
1512 (Pmode, base, plus_constant (base, 16)));
1515 base = XEXP (base, 0);
1517 = change_address (in, DImode,
1519 (Pmode, base, plus_constant (base, -16)));
1525 if (scratch == NULL_RTX)
1527 out[1] = change_address (in, DImode, scratch);
1528 return gen_adddi3 (scratch, base, GEN_INT (8));
1533 split_double (in, &out[0], &out[1]);
1541 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1542 through memory plus an extra GR scratch register. Except that you can
1543 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1544 SECONDARY_RELOAD_CLASS, but not both.
1546 We got into problems in the first place by allowing a construct like
1547 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1548 This solution attempts to prevent this situation from occurring. When
1549 we see something like the above, we spill the inner register to memory. */
1552 spill_tfmode_operand (in, force)
1556 if (GET_CODE (in) == SUBREG
1557 && GET_MODE (SUBREG_REG (in)) == TImode
1558 && GET_CODE (SUBREG_REG (in)) == REG)
1560 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, /*rescan=*/true);
1561 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1563 else if (force && GET_CODE (in) == REG)
1565 rtx mem = gen_mem_addressof (in, NULL_TREE, /*rescan=*/true);
1566 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1568 else if (GET_CODE (in) == MEM
1569 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1570 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1575 /* Emit comparison instruction if necessary, returning the expression
1576 that holds the compare result in the proper mode. */
1579 ia64_expand_compare (code, mode)
1581 enum machine_mode mode;
1583 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1586 /* If we have a BImode input, then we already have a compare result, and
1587 do not need to emit another comparison. */
1588 if (GET_MODE (op0) == BImode)
1590 if ((code == NE || code == EQ) && op1 == const0_rtx)
1597 cmp = gen_reg_rtx (BImode);
1598 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1599 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1603 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1606 /* Emit the appropriate sequence for a call. */
1609 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1612 rtx nextarg ATTRIBUTE_UNUSED;
1617 addr = XEXP (addr, 0);
1618 b0 = gen_rtx_REG (DImode, R_BR (0));
1620 /* ??? Should do this for functions known to bind local too. */
1621 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1624 insn = gen_sibcall_nogp (addr);
1626 insn = gen_call_nogp (addr, b0);
1628 insn = gen_call_value_nogp (retval, addr, b0);
1629 insn = emit_call_insn (insn);
1634 insn = gen_sibcall_gp (addr);
1636 insn = gen_call_gp (addr, b0);
1638 insn = gen_call_value_gp (retval, addr, b0);
1639 insn = emit_call_insn (insn);
1641 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1645 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1653 if (current_frame_info.reg_save_gp)
1654 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1657 HOST_WIDE_INT offset;
1659 offset = (current_frame_info.spill_cfa_off
1660 + current_frame_info.spill_size);
1661 if (frame_pointer_needed)
1663 tmp = hard_frame_pointer_rtx;
1668 tmp = stack_pointer_rtx;
1669 offset = current_frame_info.total_size - offset;
1672 if (CONST_OK_FOR_I (offset))
1673 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1674 tmp, GEN_INT (offset)));
1677 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1678 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1679 pic_offset_table_rtx, tmp));
1682 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1685 emit_move_insn (pic_offset_table_rtx, tmp);
1689 ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b,
1690 noreturn_p, sibcall_p)
1691 rtx retval, addr, retaddr, scratch_r, scratch_b;
1692 int noreturn_p, sibcall_p;
1695 bool is_desc = false;
1697 /* If we find we're calling through a register, then we're actually
1698 calling through a descriptor, so load up the values. */
1699 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1704 /* ??? We are currently constrained to *not* use peep2, because
1705 we can legitimately change the global lifetime of the GP
1706 (in the form of killing where previously live). This is
1707 because a call through a descriptor doesn't use the previous
1708 value of the GP, while a direct call does, and we do not
1709 commit to either form until the split here.
1711 That said, this means that we lack precise life info for
1712 whether ADDR is dead after this call. This is not terribly
1713 important, since we can fix things up essentially for free
1714 with the POST_DEC below, but it's nice to not use it when we
1715 can immediately tell it's not necessary. */
1716 addr_dead_p = ((noreturn_p || sibcall_p
1717 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1719 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1721 /* Load the code address into scratch_b. */
1722 tmp = gen_rtx_POST_INC (Pmode, addr);
1723 tmp = gen_rtx_MEM (Pmode, tmp);
1724 emit_move_insn (scratch_r, tmp);
1725 emit_move_insn (scratch_b, scratch_r);
1727 /* Load the GP address. If ADDR is not dead here, then we must
1728 revert the change made above via the POST_INCREMENT. */
1730 tmp = gen_rtx_POST_DEC (Pmode, addr);
1733 tmp = gen_rtx_MEM (Pmode, tmp);
1734 emit_move_insn (pic_offset_table_rtx, tmp);
1741 insn = gen_sibcall_nogp (addr);
1743 insn = gen_call_value_nogp (retval, addr, retaddr);
1745 insn = gen_call_nogp (addr, retaddr);
1746 emit_call_insn (insn);
1748 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1752 /* Begin the assembly file. */
1757 default_file_start ();
1758 emit_safe_across_calls ();
1762 emit_safe_across_calls ()
1764 unsigned int rs, re;
1771 while (rs < 64 && call_used_regs[PR_REG (rs)])
1775 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1779 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1783 fputc (',', asm_out_file);
1785 fprintf (asm_out_file, "p%u", rs);
1787 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1791 fputc ('\n', asm_out_file);
1794 /* Helper function for ia64_compute_frame_size: find an appropriate general
1795 register to spill some special register to. SPECIAL_SPILL_MASK contains
1796 bits in GR0 to GR31 that have already been allocated by this routine.
1797 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1800 find_gr_spill (try_locals)
1805 /* If this is a leaf function, first try an otherwise unused
1806 call-clobbered register. */
1807 if (current_function_is_leaf)
1809 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1810 if (! regs_ever_live[regno]
1811 && call_used_regs[regno]
1812 && ! fixed_regs[regno]
1813 && ! global_regs[regno]
1814 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1816 current_frame_info.gr_used_mask |= 1 << regno;
1823 regno = current_frame_info.n_local_regs;
1824 /* If there is a frame pointer, then we can't use loc79, because
1825 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1826 reg_name switching code in ia64_expand_prologue. */
1827 if (regno < (80 - frame_pointer_needed))
1829 current_frame_info.n_local_regs = regno + 1;
1830 return LOC_REG (0) + regno;
1834 /* Failed to find a general register to spill to. Must use stack. */
1838 /* In order to make for nice schedules, we try to allocate every temporary
1839 to a different register. We must of course stay away from call-saved,
1840 fixed, and global registers. We must also stay away from registers
1841 allocated in current_frame_info.gr_used_mask, since those include regs
1842 used all through the prologue.
1844 Any register allocated here must be used immediately. The idea is to
1845 aid scheduling, not to solve data flow problems. */
1847 static int last_scratch_gr_reg;
1850 next_scratch_gr_reg ()
1854 for (i = 0; i < 32; ++i)
1856 regno = (last_scratch_gr_reg + i + 1) & 31;
1857 if (call_used_regs[regno]
1858 && ! fixed_regs[regno]
1859 && ! global_regs[regno]
1860 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1862 last_scratch_gr_reg = regno;
1867 /* There must be _something_ available. */
1871 /* Helper function for ia64_compute_frame_size, called through
1872 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1875 mark_reg_gr_used_mask (reg, data)
1877 void *data ATTRIBUTE_UNUSED;
1879 unsigned int regno = REGNO (reg);
1882 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1883 for (i = 0; i < n; ++i)
1884 current_frame_info.gr_used_mask |= 1 << (regno + i);
1888 /* Returns the number of bytes offset between the frame pointer and the stack
1889 pointer for the current function. SIZE is the number of bytes of space
1890 needed for local variables. */
1893 ia64_compute_frame_size (size)
1896 HOST_WIDE_INT total_size;
1897 HOST_WIDE_INT spill_size = 0;
1898 HOST_WIDE_INT extra_spill_size = 0;
1899 HOST_WIDE_INT pretend_args_size;
1902 int spilled_gr_p = 0;
1903 int spilled_fr_p = 0;
1907 if (current_frame_info.initialized)
1910 memset (¤t_frame_info, 0, sizeof current_frame_info);
1911 CLEAR_HARD_REG_SET (mask);
1913 /* Don't allocate scratches to the return register. */
1914 diddle_return_value (mark_reg_gr_used_mask, NULL);
1916 /* Don't allocate scratches to the EH scratch registers. */
1917 if (cfun->machine->ia64_eh_epilogue_sp)
1918 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1919 if (cfun->machine->ia64_eh_epilogue_bsp)
1920 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1922 /* Find the size of the register stack frame. We have only 80 local
1923 registers, because we reserve 8 for the inputs and 8 for the
1926 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1927 since we'll be adjusting that down later. */
1928 regno = LOC_REG (78) + ! frame_pointer_needed;
1929 for (; regno >= LOC_REG (0); regno--)
1930 if (regs_ever_live[regno])
1932 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1934 /* For functions marked with the syscall_linkage attribute, we must mark
1935 all eight input registers as in use, so that locals aren't visible to
1938 if (cfun->machine->n_varargs > 0
1939 || lookup_attribute ("syscall_linkage",
1940 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1941 current_frame_info.n_input_regs = 8;
1944 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1945 if (regs_ever_live[regno])
1947 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1950 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1951 if (regs_ever_live[regno])
1953 i = regno - OUT_REG (0) + 1;
1955 /* When -p profiling, we need one output register for the mcount argument.
1956 Likewise for -a profiling for the bb_init_func argument. For -ax
1957 profiling, we need two output registers for the two bb_init_trace_func
1959 if (current_function_profile)
1961 current_frame_info.n_output_regs = i;
1963 /* ??? No rotating register support yet. */
1964 current_frame_info.n_rotate_regs = 0;
1966 /* Discover which registers need spilling, and how much room that
1967 will take. Begin with floating point and general registers,
1968 which will always wind up on the stack. */
1970 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1971 if (regs_ever_live[regno] && ! call_used_regs[regno])
1973 SET_HARD_REG_BIT (mask, regno);
1979 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1980 if (regs_ever_live[regno] && ! call_used_regs[regno])
1982 SET_HARD_REG_BIT (mask, regno);
1988 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1989 if (regs_ever_live[regno] && ! call_used_regs[regno])
1991 SET_HARD_REG_BIT (mask, regno);
1996 /* Now come all special registers that might get saved in other
1997 general registers. */
1999 if (frame_pointer_needed)
2001 current_frame_info.reg_fp = find_gr_spill (1);
2002 /* If we did not get a register, then we take LOC79. This is guaranteed
2003 to be free, even if regs_ever_live is already set, because this is
2004 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2005 as we don't count loc79 above. */
2006 if (current_frame_info.reg_fp == 0)
2008 current_frame_info.reg_fp = LOC_REG (79);
2009 current_frame_info.n_local_regs++;
2013 if (! current_function_is_leaf)
2015 /* Emit a save of BR0 if we call other functions. Do this even
2016 if this function doesn't return, as EH depends on this to be
2017 able to unwind the stack. */
2018 SET_HARD_REG_BIT (mask, BR_REG (0));
2020 current_frame_info.reg_save_b0 = find_gr_spill (1);
2021 if (current_frame_info.reg_save_b0 == 0)
2027 /* Similarly for ar.pfs. */
2028 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2029 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2030 if (current_frame_info.reg_save_ar_pfs == 0)
2032 extra_spill_size += 8;
2036 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2037 registers are clobbered, so we fall back to the stack. */
2038 current_frame_info.reg_save_gp
2039 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2040 if (current_frame_info.reg_save_gp == 0)
2042 SET_HARD_REG_BIT (mask, GR_REG (1));
2049 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2051 SET_HARD_REG_BIT (mask, BR_REG (0));
2056 if (regs_ever_live[AR_PFS_REGNUM])
2058 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2059 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2060 if (current_frame_info.reg_save_ar_pfs == 0)
2062 extra_spill_size += 8;
2068 /* Unwind descriptor hackery: things are most efficient if we allocate
2069 consecutive GR save registers for RP, PFS, FP in that order. However,
2070 it is absolutely critical that FP get the only hard register that's
2071 guaranteed to be free, so we allocated it first. If all three did
2072 happen to be allocated hard regs, and are consecutive, rearrange them
2073 into the preferred order now. */
2074 if (current_frame_info.reg_fp != 0
2075 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2076 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2078 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2079 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2080 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2083 /* See if we need to store the predicate register block. */
2084 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2085 if (regs_ever_live[regno] && ! call_used_regs[regno])
2087 if (regno <= PR_REG (63))
2089 SET_HARD_REG_BIT (mask, PR_REG (0));
2090 current_frame_info.reg_save_pr = find_gr_spill (1);
2091 if (current_frame_info.reg_save_pr == 0)
2093 extra_spill_size += 8;
2097 /* ??? Mark them all as used so that register renaming and such
2098 are free to use them. */
2099 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2100 regs_ever_live[regno] = 1;
2103 /* If we're forced to use st8.spill, we're forced to save and restore
2104 ar.unat as well. The check for existing liveness allows inline asm
2105 to touch ar.unat. */
2106 if (spilled_gr_p || cfun->machine->n_varargs
2107 || regs_ever_live[AR_UNAT_REGNUM])
2109 regs_ever_live[AR_UNAT_REGNUM] = 1;
2110 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2111 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2112 if (current_frame_info.reg_save_ar_unat == 0)
2114 extra_spill_size += 8;
2119 if (regs_ever_live[AR_LC_REGNUM])
2121 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2122 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2123 if (current_frame_info.reg_save_ar_lc == 0)
2125 extra_spill_size += 8;
2130 /* If we have an odd number of words of pretend arguments written to
2131 the stack, then the FR save area will be unaligned. We round the
2132 size of this area up to keep things 16 byte aligned. */
2134 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2136 pretend_args_size = current_function_pretend_args_size;
2138 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2139 + current_function_outgoing_args_size);
2140 total_size = IA64_STACK_ALIGN (total_size);
2142 /* We always use the 16-byte scratch area provided by the caller, but
2143 if we are a leaf function, there's no one to which we need to provide
2145 if (current_function_is_leaf)
2146 total_size = MAX (0, total_size - 16);
2148 current_frame_info.total_size = total_size;
2149 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2150 current_frame_info.spill_size = spill_size;
2151 current_frame_info.extra_spill_size = extra_spill_size;
2152 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2153 current_frame_info.n_spilled = n_spilled;
2154 current_frame_info.initialized = reload_completed;
2157 /* Compute the initial difference between the specified pair of registers. */
2160 ia64_initial_elimination_offset (from, to)
2163 HOST_WIDE_INT offset;
2165 ia64_compute_frame_size (get_frame_size ());
2168 case FRAME_POINTER_REGNUM:
2169 if (to == HARD_FRAME_POINTER_REGNUM)
2171 if (current_function_is_leaf)
2172 offset = -current_frame_info.total_size;
2174 offset = -(current_frame_info.total_size
2175 - current_function_outgoing_args_size - 16);
2177 else if (to == STACK_POINTER_REGNUM)
2179 if (current_function_is_leaf)
2182 offset = 16 + current_function_outgoing_args_size;
2188 case ARG_POINTER_REGNUM:
2189 /* Arguments start above the 16 byte save area, unless stdarg
2190 in which case we store through the 16 byte save area. */
2191 if (to == HARD_FRAME_POINTER_REGNUM)
2192 offset = 16 - current_function_pretend_args_size;
2193 else if (to == STACK_POINTER_REGNUM)
2194 offset = (current_frame_info.total_size
2195 + 16 - current_function_pretend_args_size);
2200 case RETURN_ADDRESS_POINTER_REGNUM:
2211 /* If there are more than a trivial number of register spills, we use
2212 two interleaved iterators so that we can get two memory references
2215 In order to simplify things in the prologue and epilogue expanders,
2216 we use helper functions to fix up the memory references after the
2217 fact with the appropriate offsets to a POST_MODIFY memory mode.
2218 The following data structure tracks the state of the two iterators
2219 while insns are being emitted. */
2221 struct spill_fill_data
2223 rtx init_after; /* point at which to emit initializations */
2224 rtx init_reg[2]; /* initial base register */
2225 rtx iter_reg[2]; /* the iterator registers */
2226 rtx *prev_addr[2]; /* address of last memory use */
2227 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2228 HOST_WIDE_INT prev_off[2]; /* last offset */
2229 int n_iter; /* number of iterators in use */
2230 int next_iter; /* next iterator to use */
2231 unsigned int save_gr_used_mask;
2234 static struct spill_fill_data spill_fill_data;
2237 setup_spill_pointers (n_spills, init_reg, cfa_off)
2240 HOST_WIDE_INT cfa_off;
2244 spill_fill_data.init_after = get_last_insn ();
2245 spill_fill_data.init_reg[0] = init_reg;
2246 spill_fill_data.init_reg[1] = init_reg;
2247 spill_fill_data.prev_addr[0] = NULL;
2248 spill_fill_data.prev_addr[1] = NULL;
2249 spill_fill_data.prev_insn[0] = NULL;
2250 spill_fill_data.prev_insn[1] = NULL;
2251 spill_fill_data.prev_off[0] = cfa_off;
2252 spill_fill_data.prev_off[1] = cfa_off;
2253 spill_fill_data.next_iter = 0;
2254 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2256 spill_fill_data.n_iter = 1 + (n_spills > 2);
2257 for (i = 0; i < spill_fill_data.n_iter; ++i)
2259 int regno = next_scratch_gr_reg ();
2260 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2261 current_frame_info.gr_used_mask |= 1 << regno;
2266 finish_spill_pointers ()
2268 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2272 spill_restore_mem (reg, cfa_off)
2274 HOST_WIDE_INT cfa_off;
2276 int iter = spill_fill_data.next_iter;
2277 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2278 rtx disp_rtx = GEN_INT (disp);
2281 if (spill_fill_data.prev_addr[iter])
2283 if (CONST_OK_FOR_N (disp))
2285 *spill_fill_data.prev_addr[iter]
2286 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2287 gen_rtx_PLUS (DImode,
2288 spill_fill_data.iter_reg[iter],
2290 REG_NOTES (spill_fill_data.prev_insn[iter])
2291 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2292 REG_NOTES (spill_fill_data.prev_insn[iter]));
2296 /* ??? Could use register post_modify for loads. */
2297 if (! CONST_OK_FOR_I (disp))
2299 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2300 emit_move_insn (tmp, disp_rtx);
2303 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2304 spill_fill_data.iter_reg[iter], disp_rtx));
2307 /* Micro-optimization: if we've created a frame pointer, it's at
2308 CFA 0, which may allow the real iterator to be initialized lower,
2309 slightly increasing parallelism. Also, if there are few saves
2310 it may eliminate the iterator entirely. */
2312 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2313 && frame_pointer_needed)
2315 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2316 set_mem_alias_set (mem, get_varargs_alias_set ());
2324 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2325 spill_fill_data.init_reg[iter]);
2330 if (! CONST_OK_FOR_I (disp))
2332 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2333 emit_move_insn (tmp, disp_rtx);
2337 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2338 spill_fill_data.init_reg[iter],
2345 /* Careful for being the first insn in a sequence. */
2346 if (spill_fill_data.init_after)
2347 insn = emit_insn_after (seq, spill_fill_data.init_after);
2350 rtx first = get_insns ();
2352 insn = emit_insn_before (seq, first);
2354 insn = emit_insn (seq);
2356 spill_fill_data.init_after = insn;
2358 /* If DISP is 0, we may or may not have a further adjustment
2359 afterward. If we do, then the load/store insn may be modified
2360 to be a post-modify. If we don't, then this copy may be
2361 eliminated by copyprop_hardreg_forward, which makes this
2362 insn garbage, which runs afoul of the sanity check in
2363 propagate_one_insn. So mark this insn as legal to delete. */
2365 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2369 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2371 /* ??? Not all of the spills are for varargs, but some of them are.
2372 The rest of the spills belong in an alias set of their own. But
2373 it doesn't actually hurt to include them here. */
2374 set_mem_alias_set (mem, get_varargs_alias_set ());
2376 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2377 spill_fill_data.prev_off[iter] = cfa_off;
2379 if (++iter >= spill_fill_data.n_iter)
2381 spill_fill_data.next_iter = iter;
2387 do_spill (move_fn, reg, cfa_off, frame_reg)
2388 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2390 HOST_WIDE_INT cfa_off;
2392 int iter = spill_fill_data.next_iter;
2395 mem = spill_restore_mem (reg, cfa_off);
2396 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2397 spill_fill_data.prev_insn[iter] = insn;
2404 RTX_FRAME_RELATED_P (insn) = 1;
2406 /* Don't even pretend that the unwind code can intuit its way
2407 through a pair of interleaved post_modify iterators. Just
2408 provide the correct answer. */
2410 if (frame_pointer_needed)
2412 base = hard_frame_pointer_rtx;
2417 base = stack_pointer_rtx;
2418 off = current_frame_info.total_size - cfa_off;
2422 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2423 gen_rtx_SET (VOIDmode,
2424 gen_rtx_MEM (GET_MODE (reg),
2425 plus_constant (base, off)),
2432 do_restore (move_fn, reg, cfa_off)
2433 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2435 HOST_WIDE_INT cfa_off;
2437 int iter = spill_fill_data.next_iter;
2440 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2441 GEN_INT (cfa_off)));
2442 spill_fill_data.prev_insn[iter] = insn;
2445 /* Wrapper functions that discards the CONST_INT spill offset. These
2446 exist so that we can give gr_spill/gr_fill the offset they need and
2447 use a consistent function interface. */
2450 gen_movdi_x (dest, src, offset)
2452 rtx offset ATTRIBUTE_UNUSED;
2454 return gen_movdi (dest, src);
2458 gen_fr_spill_x (dest, src, offset)
2460 rtx offset ATTRIBUTE_UNUSED;
2462 return gen_fr_spill (dest, src);
2466 gen_fr_restore_x (dest, src, offset)
2468 rtx offset ATTRIBUTE_UNUSED;
2470 return gen_fr_restore (dest, src);
2473 /* Called after register allocation to add any instructions needed for the
2474 prologue. Using a prologue insn is favored compared to putting all of the
2475 instructions in output_function_prologue(), since it allows the scheduler
2476 to intermix instructions with the saves of the caller saved registers. In
2477 some cases, it might be necessary to emit a barrier instruction as the last
2478 insn to prevent such scheduling.
2480 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2481 so that the debug info generation code can handle them properly.
2483 The register save area is layed out like so:
2485 [ varargs spill area ]
2486 [ fr register spill area ]
2487 [ br register spill area ]
2488 [ ar register spill area ]
2489 [ pr register spill area ]
2490 [ gr register spill area ] */
2492 /* ??? Get inefficient code when the frame size is larger than can fit in an
2493 adds instruction. */
2496 ia64_expand_prologue ()
2498 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2499 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2502 ia64_compute_frame_size (get_frame_size ());
2503 last_scratch_gr_reg = 15;
2505 /* If there is no epilogue, then we don't need some prologue insns.
2506 We need to avoid emitting the dead prologue insns, because flow
2507 will complain about them. */
2512 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2513 if ((e->flags & EDGE_FAKE) == 0
2514 && (e->flags & EDGE_FALLTHRU) != 0)
2516 epilogue_p = (e != NULL);
2521 /* Set the local, input, and output register names. We need to do this
2522 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2523 half. If we use in/loc/out register names, then we get assembler errors
2524 in crtn.S because there is no alloc insn or regstk directive in there. */
2525 if (! TARGET_REG_NAMES)
2527 int inputs = current_frame_info.n_input_regs;
2528 int locals = current_frame_info.n_local_regs;
2529 int outputs = current_frame_info.n_output_regs;
2531 for (i = 0; i < inputs; i++)
2532 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2533 for (i = 0; i < locals; i++)
2534 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2535 for (i = 0; i < outputs; i++)
2536 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2539 /* Set the frame pointer register name. The regnum is logically loc79,
2540 but of course we'll not have allocated that many locals. Rather than
2541 worrying about renumbering the existing rtxs, we adjust the name. */
2542 /* ??? This code means that we can never use one local register when
2543 there is a frame pointer. loc79 gets wasted in this case, as it is
2544 renamed to a register that will never be used. See also the try_locals
2545 code in find_gr_spill. */
2546 if (current_frame_info.reg_fp)
2548 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2549 reg_names[HARD_FRAME_POINTER_REGNUM]
2550 = reg_names[current_frame_info.reg_fp];
2551 reg_names[current_frame_info.reg_fp] = tmp;
2554 /* Fix up the return address placeholder. */
2555 /* ??? We can fail if __builtin_return_address is used, and we didn't
2556 allocate a register in which to save b0. I can't think of a way to
2557 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2558 then be sure that I got the right one. Further, reload doesn't seem
2559 to care if an eliminable register isn't used, and "eliminates" it
2561 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2562 && current_frame_info.reg_save_b0 != 0)
2563 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2565 /* We don't need an alloc instruction if we've used no outputs or locals. */
2566 if (current_frame_info.n_local_regs == 0
2567 && current_frame_info.n_output_regs == 0
2568 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2569 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2571 /* If there is no alloc, but there are input registers used, then we
2572 need a .regstk directive. */
2573 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2574 ar_pfs_save_reg = NULL_RTX;
2578 current_frame_info.need_regstk = 0;
2580 if (current_frame_info.reg_save_ar_pfs)
2581 regno = current_frame_info.reg_save_ar_pfs;
2583 regno = next_scratch_gr_reg ();
2584 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2586 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2587 GEN_INT (current_frame_info.n_input_regs),
2588 GEN_INT (current_frame_info.n_local_regs),
2589 GEN_INT (current_frame_info.n_output_regs),
2590 GEN_INT (current_frame_info.n_rotate_regs)));
2591 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2594 /* Set up frame pointer, stack pointer, and spill iterators. */
2596 n_varargs = cfun->machine->n_varargs;
2597 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2598 stack_pointer_rtx, 0);
2600 if (frame_pointer_needed)
2602 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2603 RTX_FRAME_RELATED_P (insn) = 1;
2606 if (current_frame_info.total_size != 0)
2608 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2611 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2612 offset = frame_size_rtx;
2615 regno = next_scratch_gr_reg ();
2616 offset = gen_rtx_REG (DImode, regno);
2617 emit_move_insn (offset, frame_size_rtx);
2620 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2621 stack_pointer_rtx, offset));
2623 if (! frame_pointer_needed)
2625 RTX_FRAME_RELATED_P (insn) = 1;
2626 if (GET_CODE (offset) != CONST_INT)
2629 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2630 gen_rtx_SET (VOIDmode,
2632 gen_rtx_PLUS (DImode,
2639 /* ??? At this point we must generate a magic insn that appears to
2640 modify the stack pointer, the frame pointer, and all spill
2641 iterators. This would allow the most scheduling freedom. For
2642 now, just hard stop. */
2643 emit_insn (gen_blockage ());
2646 /* Must copy out ar.unat before doing any integer spills. */
2647 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2649 if (current_frame_info.reg_save_ar_unat)
2651 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2654 alt_regno = next_scratch_gr_reg ();
2655 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2656 current_frame_info.gr_used_mask |= 1 << alt_regno;
2659 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2660 insn = emit_move_insn (ar_unat_save_reg, reg);
2661 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2663 /* Even if we're not going to generate an epilogue, we still
2664 need to save the register so that EH works. */
2665 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2666 emit_insn (gen_prologue_use (ar_unat_save_reg));
2669 ar_unat_save_reg = NULL_RTX;
2671 /* Spill all varargs registers. Do this before spilling any GR registers,
2672 since we want the UNAT bits for the GR registers to override the UNAT
2673 bits from varargs, which we don't care about. */
2676 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2678 reg = gen_rtx_REG (DImode, regno);
2679 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2682 /* Locate the bottom of the register save area. */
2683 cfa_off = (current_frame_info.spill_cfa_off
2684 + current_frame_info.spill_size
2685 + current_frame_info.extra_spill_size);
2687 /* Save the predicate register block either in a register or in memory. */
2688 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2690 reg = gen_rtx_REG (DImode, PR_REG (0));
2691 if (current_frame_info.reg_save_pr != 0)
2693 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2694 insn = emit_move_insn (alt_reg, reg);
2696 /* ??? Denote pr spill/fill by a DImode move that modifies all
2697 64 hard registers. */
2698 RTX_FRAME_RELATED_P (insn) = 1;
2700 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2701 gen_rtx_SET (VOIDmode, alt_reg, reg),
2704 /* Even if we're not going to generate an epilogue, we still
2705 need to save the register so that EH works. */
2707 emit_insn (gen_prologue_use (alt_reg));
2711 alt_regno = next_scratch_gr_reg ();
2712 alt_reg = gen_rtx_REG (DImode, alt_regno);
2713 insn = emit_move_insn (alt_reg, reg);
2714 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2719 /* Handle AR regs in numerical order. All of them get special handling. */
2720 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2721 && current_frame_info.reg_save_ar_unat == 0)
2723 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2724 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2728 /* The alloc insn already copied ar.pfs into a general register. The
2729 only thing we have to do now is copy that register to a stack slot
2730 if we'd not allocated a local register for the job. */
2731 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2732 && current_frame_info.reg_save_ar_pfs == 0)
2734 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2735 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2739 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2741 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2742 if (current_frame_info.reg_save_ar_lc != 0)
2744 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2745 insn = emit_move_insn (alt_reg, reg);
2746 RTX_FRAME_RELATED_P (insn) = 1;
2748 /* Even if we're not going to generate an epilogue, we still
2749 need to save the register so that EH works. */
2751 emit_insn (gen_prologue_use (alt_reg));
2755 alt_regno = next_scratch_gr_reg ();
2756 alt_reg = gen_rtx_REG (DImode, alt_regno);
2757 emit_move_insn (alt_reg, reg);
2758 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2763 if (current_frame_info.reg_save_gp)
2765 insn = emit_move_insn (gen_rtx_REG (DImode,
2766 current_frame_info.reg_save_gp),
2767 pic_offset_table_rtx);
2768 /* We don't know for sure yet if this is actually needed, since
2769 we've not split the PIC call patterns. If all of the calls
2770 are indirect, and not followed by any uses of the gp, then
2771 this save is dead. Allow it to go away. */
2773 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2776 /* We should now be at the base of the gr/br/fr spill area. */
2777 if (cfa_off != (current_frame_info.spill_cfa_off
2778 + current_frame_info.spill_size))
2781 /* Spill all general registers. */
2782 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2783 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2785 reg = gen_rtx_REG (DImode, regno);
2786 do_spill (gen_gr_spill, reg, cfa_off, reg);
2790 /* Handle BR0 specially -- it may be getting stored permanently in
2791 some GR register. */
2792 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2794 reg = gen_rtx_REG (DImode, BR_REG (0));
2795 if (current_frame_info.reg_save_b0 != 0)
2797 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2798 insn = emit_move_insn (alt_reg, reg);
2799 RTX_FRAME_RELATED_P (insn) = 1;
2801 /* Even if we're not going to generate an epilogue, we still
2802 need to save the register so that EH works. */
2804 emit_insn (gen_prologue_use (alt_reg));
2808 alt_regno = next_scratch_gr_reg ();
2809 alt_reg = gen_rtx_REG (DImode, alt_regno);
2810 emit_move_insn (alt_reg, reg);
2811 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2816 /* Spill the rest of the BR registers. */
2817 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2818 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2820 alt_regno = next_scratch_gr_reg ();
2821 alt_reg = gen_rtx_REG (DImode, alt_regno);
2822 reg = gen_rtx_REG (DImode, regno);
2823 emit_move_insn (alt_reg, reg);
2824 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2828 /* Align the frame and spill all FR registers. */
2829 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2830 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2834 reg = gen_rtx_REG (TFmode, regno);
2835 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2839 if (cfa_off != current_frame_info.spill_cfa_off)
2842 finish_spill_pointers ();
2845 /* Called after register allocation to add any instructions needed for the
2846 epilogue. Using an epilogue insn is favored compared to putting all of the
2847 instructions in output_function_prologue(), since it allows the scheduler
2848 to intermix instructions with the saves of the caller saved registers. In
2849 some cases, it might be necessary to emit a barrier instruction as the last
2850 insn to prevent such scheduling. */
2853 ia64_expand_epilogue (sibcall_p)
2856 rtx insn, reg, alt_reg, ar_unat_save_reg;
2857 int regno, alt_regno, cfa_off;
2859 ia64_compute_frame_size (get_frame_size ());
2861 /* If there is a frame pointer, then we use it instead of the stack
2862 pointer, so that the stack pointer does not need to be valid when
2863 the epilogue starts. See EXIT_IGNORE_STACK. */
2864 if (frame_pointer_needed)
2865 setup_spill_pointers (current_frame_info.n_spilled,
2866 hard_frame_pointer_rtx, 0);
2868 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2869 current_frame_info.total_size);
2871 if (current_frame_info.total_size != 0)
2873 /* ??? At this point we must generate a magic insn that appears to
2874 modify the spill iterators and the frame pointer. This would
2875 allow the most scheduling freedom. For now, just hard stop. */
2876 emit_insn (gen_blockage ());
2879 /* Locate the bottom of the register save area. */
2880 cfa_off = (current_frame_info.spill_cfa_off
2881 + current_frame_info.spill_size
2882 + current_frame_info.extra_spill_size);
2884 /* Restore the predicate registers. */
2885 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2887 if (current_frame_info.reg_save_pr != 0)
2888 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2891 alt_regno = next_scratch_gr_reg ();
2892 alt_reg = gen_rtx_REG (DImode, alt_regno);
2893 do_restore (gen_movdi_x, alt_reg, cfa_off);
2896 reg = gen_rtx_REG (DImode, PR_REG (0));
2897 emit_move_insn (reg, alt_reg);
2900 /* Restore the application registers. */
2902 /* Load the saved unat from the stack, but do not restore it until
2903 after the GRs have been restored. */
2904 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2906 if (current_frame_info.reg_save_ar_unat != 0)
2908 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2911 alt_regno = next_scratch_gr_reg ();
2912 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2913 current_frame_info.gr_used_mask |= 1 << alt_regno;
2914 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2919 ar_unat_save_reg = NULL_RTX;
2921 if (current_frame_info.reg_save_ar_pfs != 0)
2923 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2924 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2925 emit_move_insn (reg, alt_reg);
2927 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2929 alt_regno = next_scratch_gr_reg ();
2930 alt_reg = gen_rtx_REG (DImode, alt_regno);
2931 do_restore (gen_movdi_x, alt_reg, cfa_off);
2933 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2934 emit_move_insn (reg, alt_reg);
2937 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2939 if (current_frame_info.reg_save_ar_lc != 0)
2940 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2943 alt_regno = next_scratch_gr_reg ();
2944 alt_reg = gen_rtx_REG (DImode, alt_regno);
2945 do_restore (gen_movdi_x, alt_reg, cfa_off);
2948 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2949 emit_move_insn (reg, alt_reg);
2952 /* We should now be at the base of the gr/br/fr spill area. */
2953 if (cfa_off != (current_frame_info.spill_cfa_off
2954 + current_frame_info.spill_size))
2957 /* The GP may be stored on the stack in the prologue, but it's
2958 never restored in the epilogue. Skip the stack slot. */
2959 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
2962 /* Restore all general registers. */
2963 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
2964 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2966 reg = gen_rtx_REG (DImode, regno);
2967 do_restore (gen_gr_restore, reg, cfa_off);
2971 /* Restore the branch registers. Handle B0 specially, as it may
2972 have gotten stored in some GR register. */
2973 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2975 if (current_frame_info.reg_save_b0 != 0)
2976 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2979 alt_regno = next_scratch_gr_reg ();
2980 alt_reg = gen_rtx_REG (DImode, alt_regno);
2981 do_restore (gen_movdi_x, alt_reg, cfa_off);
2984 reg = gen_rtx_REG (DImode, BR_REG (0));
2985 emit_move_insn (reg, alt_reg);
2988 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2989 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2991 alt_regno = next_scratch_gr_reg ();
2992 alt_reg = gen_rtx_REG (DImode, alt_regno);
2993 do_restore (gen_movdi_x, alt_reg, cfa_off);
2995 reg = gen_rtx_REG (DImode, regno);
2996 emit_move_insn (reg, alt_reg);
2999 /* Restore floating point registers. */
3000 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3001 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3005 reg = gen_rtx_REG (TFmode, regno);
3006 do_restore (gen_fr_restore_x, reg, cfa_off);
3010 /* Restore ar.unat for real. */
3011 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3013 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3014 emit_move_insn (reg, ar_unat_save_reg);
3017 if (cfa_off != current_frame_info.spill_cfa_off)
3020 finish_spill_pointers ();
3022 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3024 /* ??? At this point we must generate a magic insn that appears to
3025 modify the spill iterators, the stack pointer, and the frame
3026 pointer. This would allow the most scheduling freedom. For now,
3028 emit_insn (gen_blockage ());
3031 if (cfun->machine->ia64_eh_epilogue_sp)
3032 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3033 else if (frame_pointer_needed)
3035 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3036 RTX_FRAME_RELATED_P (insn) = 1;
3038 else if (current_frame_info.total_size)
3040 rtx offset, frame_size_rtx;
3042 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3043 if (CONST_OK_FOR_I (current_frame_info.total_size))
3044 offset = frame_size_rtx;
3047 regno = next_scratch_gr_reg ();
3048 offset = gen_rtx_REG (DImode, regno);
3049 emit_move_insn (offset, frame_size_rtx);
3052 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3055 RTX_FRAME_RELATED_P (insn) = 1;
3056 if (GET_CODE (offset) != CONST_INT)
3059 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3060 gen_rtx_SET (VOIDmode,
3062 gen_rtx_PLUS (DImode,
3069 if (cfun->machine->ia64_eh_epilogue_bsp)
3070 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3073 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3076 int fp = GR_REG (2);
3077 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3078 first available call clobbered register. If there was a frame_pointer
3079 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3080 so we have to make sure we're using the string "r2" when emitting
3081 the register name for the assembler. */
3082 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3083 fp = HARD_FRAME_POINTER_REGNUM;
3085 /* We must emit an alloc to force the input registers to become output
3086 registers. Otherwise, if the callee tries to pass its parameters
3087 through to another call without an intervening alloc, then these
3089 /* ??? We don't need to preserve all input registers. We only need to
3090 preserve those input registers used as arguments to the sibling call.
3091 It is unclear how to compute that number here. */
3092 if (current_frame_info.n_input_regs != 0)
3093 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3094 GEN_INT (0), GEN_INT (0),
3095 GEN_INT (current_frame_info.n_input_regs),
3100 /* Return 1 if br.ret can do all the work required to return from a
3104 ia64_direct_return ()
3106 if (reload_completed && ! frame_pointer_needed)
3108 ia64_compute_frame_size (get_frame_size ());
3110 return (current_frame_info.total_size == 0
3111 && current_frame_info.n_spilled == 0
3112 && current_frame_info.reg_save_b0 == 0
3113 && current_frame_info.reg_save_pr == 0
3114 && current_frame_info.reg_save_ar_pfs == 0
3115 && current_frame_info.reg_save_ar_unat == 0
3116 && current_frame_info.reg_save_ar_lc == 0);
3122 ia64_hard_regno_rename_ok (from, to)
3126 /* Don't clobber any of the registers we reserved for the prologue. */
3127 if (to == current_frame_info.reg_fp
3128 || to == current_frame_info.reg_save_b0
3129 || to == current_frame_info.reg_save_pr
3130 || to == current_frame_info.reg_save_ar_pfs
3131 || to == current_frame_info.reg_save_ar_unat
3132 || to == current_frame_info.reg_save_ar_lc)
3135 if (from == current_frame_info.reg_fp
3136 || from == current_frame_info.reg_save_b0
3137 || from == current_frame_info.reg_save_pr
3138 || from == current_frame_info.reg_save_ar_pfs
3139 || from == current_frame_info.reg_save_ar_unat
3140 || from == current_frame_info.reg_save_ar_lc)
3143 /* Don't use output registers outside the register frame. */
3144 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3147 /* Retain even/oddness on predicate register pairs. */
3148 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3149 return (from & 1) == (to & 1);
3154 /* Target hook for assembling integer objects. Handle word-sized
3155 aligned objects and detect the cases when @fptr is needed. */
3158 ia64_assemble_integer (x, size, aligned_p)
3163 if (size == (TARGET_ILP32 ? 4 : 8)
3165 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3166 && GET_CODE (x) == SYMBOL_REF
3167 && SYMBOL_REF_FUNCTION_P (x))
3170 fputs ("\tdata4\t@fptr(", asm_out_file);
3172 fputs ("\tdata8\t@fptr(", asm_out_file);
3173 output_addr_const (asm_out_file, x);
3174 fputs (")\n", asm_out_file);
3177 return default_assemble_integer (x, size, aligned_p);
3180 /* Emit the function prologue. */
3183 ia64_output_function_prologue (file, size)
3185 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3187 int mask, grsave, grsave_prev;
3189 if (current_frame_info.need_regstk)
3190 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3191 current_frame_info.n_input_regs,
3192 current_frame_info.n_local_regs,
3193 current_frame_info.n_output_regs,
3194 current_frame_info.n_rotate_regs);
3196 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3199 /* Emit the .prologue directive. */
3202 grsave = grsave_prev = 0;
3203 if (current_frame_info.reg_save_b0 != 0)
3206 grsave = grsave_prev = current_frame_info.reg_save_b0;
3208 if (current_frame_info.reg_save_ar_pfs != 0
3209 && (grsave_prev == 0
3210 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3213 if (grsave_prev == 0)
3214 grsave = current_frame_info.reg_save_ar_pfs;
3215 grsave_prev = current_frame_info.reg_save_ar_pfs;
3217 if (current_frame_info.reg_fp != 0
3218 && (grsave_prev == 0
3219 || current_frame_info.reg_fp == grsave_prev + 1))
3222 if (grsave_prev == 0)
3223 grsave = HARD_FRAME_POINTER_REGNUM;
3224 grsave_prev = current_frame_info.reg_fp;
3226 if (current_frame_info.reg_save_pr != 0
3227 && (grsave_prev == 0
3228 || current_frame_info.reg_save_pr == grsave_prev + 1))
3231 if (grsave_prev == 0)
3232 grsave = current_frame_info.reg_save_pr;
3236 fprintf (file, "\t.prologue %d, %d\n", mask,
3237 ia64_dbx_register_number (grsave));
3239 fputs ("\t.prologue\n", file);
3241 /* Emit a .spill directive, if necessary, to relocate the base of
3242 the register spill area. */
3243 if (current_frame_info.spill_cfa_off != -16)
3244 fprintf (file, "\t.spill %ld\n",
3245 (long) (current_frame_info.spill_cfa_off
3246 + current_frame_info.spill_size));
3249 /* Emit the .body directive at the scheduled end of the prologue. */
3252 ia64_output_function_end_prologue (file)
3255 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3258 fputs ("\t.body\n", file);
3261 /* Emit the function epilogue. */
3264 ia64_output_function_epilogue (file, size)
3265 FILE *file ATTRIBUTE_UNUSED;
3266 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3270 /* Reset from the function's potential modifications. */
3271 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
3273 if (current_frame_info.reg_fp)
3275 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3276 reg_names[HARD_FRAME_POINTER_REGNUM]
3277 = reg_names[current_frame_info.reg_fp];
3278 reg_names[current_frame_info.reg_fp] = tmp;
3280 if (! TARGET_REG_NAMES)
3282 for (i = 0; i < current_frame_info.n_input_regs; i++)
3283 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3284 for (i = 0; i < current_frame_info.n_local_regs; i++)
3285 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3286 for (i = 0; i < current_frame_info.n_output_regs; i++)
3287 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3290 current_frame_info.initialized = 0;
3294 ia64_dbx_register_number (regno)
3297 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3298 from its home at loc79 to something inside the register frame. We
3299 must perform the same renumbering here for the debug info. */
3300 if (current_frame_info.reg_fp)
3302 if (regno == HARD_FRAME_POINTER_REGNUM)
3303 regno = current_frame_info.reg_fp;
3304 else if (regno == current_frame_info.reg_fp)
3305 regno = HARD_FRAME_POINTER_REGNUM;
3308 if (IN_REGNO_P (regno))
3309 return 32 + regno - IN_REG (0);
3310 else if (LOC_REGNO_P (regno))
3311 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3312 else if (OUT_REGNO_P (regno))
3313 return (32 + current_frame_info.n_input_regs
3314 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3320 ia64_initialize_trampoline (addr, fnaddr, static_chain)
3321 rtx addr, fnaddr, static_chain;
3323 rtx addr_reg, eight = GEN_INT (8);
3325 /* Load up our iterator. */
3326 addr_reg = gen_reg_rtx (Pmode);
3327 emit_move_insn (addr_reg, addr);
3329 /* The first two words are the fake descriptor:
3330 __ia64_trampoline, ADDR+16. */
3331 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3332 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3333 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3335 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3336 copy_to_reg (plus_constant (addr, 16)));
3337 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3339 /* The third word is the target descriptor. */
3340 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3341 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3343 /* The fourth word is the static chain. */
3344 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3347 /* Do any needed setup for a variadic function. CUM has not been updated
3348 for the last named argument which has type TYPE and mode MODE.
3350 We generate the actual spill instructions during prologue generation. */
3353 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
3354 CUMULATIVE_ARGS cum;
3358 int second_time ATTRIBUTE_UNUSED;
3360 /* Skip the current argument. */
3361 ia64_function_arg_advance (&cum, int_mode, type, 1);
3363 if (cum.words < MAX_ARGUMENT_SLOTS)
3365 int n = MAX_ARGUMENT_SLOTS - cum.words;
3366 *pretend_size = n * UNITS_PER_WORD;
3367 cfun->machine->n_varargs = n;
3371 /* Check whether TYPE is a homogeneous floating point aggregate. If
3372 it is, return the mode of the floating point type that appears
3373 in all leafs. If it is not, return VOIDmode.
3375 An aggregate is a homogeneous floating point aggregate is if all
3376 fields/elements in it have the same floating point type (e.g,
3377 SFmode). 128-bit quad-precision floats are excluded. */
3379 static enum machine_mode
3380 hfa_element_mode (type, nested)
3384 enum machine_mode element_mode = VOIDmode;
3385 enum machine_mode mode;
3386 enum tree_code code = TREE_CODE (type);
3387 int know_element_mode = 0;
3392 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3393 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3394 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3395 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
3399 /* Fortran complex types are supposed to be HFAs, so we need to handle
3400 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3403 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3404 && (TYPE_MODE (type) != TCmode || INTEL_EXTENDED_IEEE_FORMAT))
3405 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
3406 * BITS_PER_UNIT, MODE_FLOAT, 0);
3411 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3412 mode if this is contained within an aggregate. */
3413 if (nested && (TYPE_MODE (type) != TFmode || INTEL_EXTENDED_IEEE_FORMAT))
3414 return TYPE_MODE (type);
3419 return hfa_element_mode (TREE_TYPE (type), 1);
3423 case QUAL_UNION_TYPE:
3424 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3426 if (TREE_CODE (t) != FIELD_DECL)
3429 mode = hfa_element_mode (TREE_TYPE (t), 1);
3430 if (know_element_mode)
3432 if (mode != element_mode)
3435 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3439 know_element_mode = 1;
3440 element_mode = mode;
3443 return element_mode;
3446 /* If we reach here, we probably have some front-end specific type
3447 that the backend doesn't know about. This can happen via the
3448 aggregate_value_p call in init_function_start. All we can do is
3449 ignore unknown tree types. */
3456 /* Return rtx for register where argument is passed, or zero if it is passed
3459 /* ??? 128-bit quad-precision floats are always passed in general
3463 ia64_function_arg (cum, mode, type, named, incoming)
3464 CUMULATIVE_ARGS *cum;
3465 enum machine_mode mode;
3470 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3471 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3472 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3475 enum machine_mode hfa_mode = VOIDmode;
3477 /* Integer and float arguments larger than 8 bytes start at the next even
3478 boundary. Aggregates larger than 8 bytes start at the next even boundary
3479 if the aggregate has 16 byte alignment. Net effect is that types with
3480 alignment greater than 8 start at the next even boundary. */
3481 /* ??? The ABI does not specify how to handle aggregates with alignment from
3482 9 to 15 bytes, or greater than 16. We handle them all as if they had
3483 16 byte alignment. Such aggregates can occur only if gcc extensions are
3485 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3487 && (cum->words & 1))
3490 /* If all argument slots are used, then it must go on the stack. */
3491 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3494 /* Check for and handle homogeneous FP aggregates. */
3496 hfa_mode = hfa_element_mode (type, 0);
3498 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3499 and unprototyped hfas are passed specially. */
3500 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3504 int fp_regs = cum->fp_regs;
3505 int int_regs = cum->words + offset;
3506 int hfa_size = GET_MODE_SIZE (hfa_mode);
3510 /* If prototyped, pass it in FR regs then GR regs.
3511 If not prototyped, pass it in both FR and GR regs.
3513 If this is an SFmode aggregate, then it is possible to run out of
3514 FR regs while GR regs are still left. In that case, we pass the
3515 remaining part in the GR regs. */
3517 /* Fill the FP regs. We do this always. We stop if we reach the end
3518 of the argument, the last FP register, or the last argument slot. */
3520 byte_size = ((mode == BLKmode)
3521 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3522 args_byte_size = int_regs * UNITS_PER_WORD;
3524 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3525 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3527 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3528 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3532 args_byte_size += hfa_size;
3536 /* If no prototype, then the whole thing must go in GR regs. */
3537 if (! cum->prototype)
3539 /* If this is an SFmode aggregate, then we might have some left over
3540 that needs to go in GR regs. */
3541 else if (byte_size != offset)
3542 int_regs += offset / UNITS_PER_WORD;
3544 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3546 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3548 enum machine_mode gr_mode = DImode;
3550 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3551 then this goes in a GR reg left adjusted/little endian, right
3552 adjusted/big endian. */
3553 /* ??? Currently this is handled wrong, because 4-byte hunks are
3554 always right adjusted/little endian. */
3557 /* If we have an even 4 byte hunk because the aggregate is a
3558 multiple of 4 bytes in size, then this goes in a GR reg right
3559 adjusted/little endian. */
3560 else if (byte_size - offset == 4)
3562 /* Complex floats need to have float mode. */
3563 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3566 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3567 gen_rtx_REG (gr_mode, (basereg
3570 offset += GET_MODE_SIZE (gr_mode);
3571 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3572 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3575 /* If we ended up using just one location, just return that one loc. */
3577 return XEXP (loc[0], 0);
3579 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3582 /* Integral and aggregates go in general registers. If we have run out of
3583 FR registers, then FP values must also go in general registers. This can
3584 happen when we have a SFmode HFA. */
3585 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3586 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3588 int byte_size = ((mode == BLKmode)
3589 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3590 if (BYTES_BIG_ENDIAN
3591 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3592 && byte_size < UNITS_PER_WORD
3595 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3596 gen_rtx_REG (DImode,
3597 (basereg + cum->words
3600 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3603 return gen_rtx_REG (mode, basereg + cum->words + offset);
3607 /* If there is a prototype, then FP values go in a FR register when
3608 named, and in a GR register when unnamed. */
3609 else if (cum->prototype)
3612 return gen_rtx_REG (mode, basereg + cum->words + offset);
3614 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3616 /* If there is no prototype, then FP values go in both FR and GR
3620 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3621 gen_rtx_REG (mode, (FR_ARG_FIRST
3624 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3626 (basereg + cum->words
3630 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3634 /* Return number of words, at the beginning of the argument, that must be
3635 put in registers. 0 is the argument is entirely in registers or entirely
3639 ia64_function_arg_partial_nregs (cum, mode, type, named)
3640 CUMULATIVE_ARGS *cum;
3641 enum machine_mode mode;
3643 int named ATTRIBUTE_UNUSED;
3645 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3646 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3650 /* Arguments with alignment larger than 8 bytes start at the next even
3652 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3654 && (cum->words & 1))
3657 /* If all argument slots are used, then it must go on the stack. */
3658 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3661 /* It doesn't matter whether the argument goes in FR or GR regs. If
3662 it fits within the 8 argument slots, then it goes entirely in
3663 registers. If it extends past the last argument slot, then the rest
3664 goes on the stack. */
3666 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3669 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3672 /* Update CUM to point after this argument. This is patterned after
3673 ia64_function_arg. */
3676 ia64_function_arg_advance (cum, mode, type, named)
3677 CUMULATIVE_ARGS *cum;
3678 enum machine_mode mode;
3682 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3683 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3686 enum machine_mode hfa_mode = VOIDmode;
3688 /* If all arg slots are already full, then there is nothing to do. */
3689 if (cum->words >= MAX_ARGUMENT_SLOTS)
3692 /* Arguments with alignment larger than 8 bytes start at the next even
3694 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3696 && (cum->words & 1))
3699 cum->words += words + offset;
3701 /* Check for and handle homogeneous FP aggregates. */
3703 hfa_mode = hfa_element_mode (type, 0);
3705 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3706 and unprototyped hfas are passed specially. */
3707 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3709 int fp_regs = cum->fp_regs;
3710 /* This is the original value of cum->words + offset. */
3711 int int_regs = cum->words - words;
3712 int hfa_size = GET_MODE_SIZE (hfa_mode);
3716 /* If prototyped, pass it in FR regs then GR regs.
3717 If not prototyped, pass it in both FR and GR regs.
3719 If this is an SFmode aggregate, then it is possible to run out of
3720 FR regs while GR regs are still left. In that case, we pass the
3721 remaining part in the GR regs. */
3723 /* Fill the FP regs. We do this always. We stop if we reach the end
3724 of the argument, the last FP register, or the last argument slot. */
3726 byte_size = ((mode == BLKmode)
3727 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3728 args_byte_size = int_regs * UNITS_PER_WORD;
3730 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3731 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3734 args_byte_size += hfa_size;
3738 cum->fp_regs = fp_regs;
3741 /* Integral and aggregates go in general registers. If we have run out of
3742 FR registers, then FP values must also go in general registers. This can
3743 happen when we have a SFmode HFA. */
3744 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3745 cum->int_regs = cum->words;
3747 /* If there is a prototype, then FP values go in a FR register when
3748 named, and in a GR register when unnamed. */
3749 else if (cum->prototype)
3752 cum->int_regs = cum->words;
3754 /* ??? Complex types should not reach here. */
3755 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3757 /* If there is no prototype, then FP values go in both FR and GR
3761 /* ??? Complex types should not reach here. */
3762 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3763 cum->int_regs = cum->words;
3767 /* Variable sized types are passed by reference. */
3768 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3771 ia64_function_arg_pass_by_reference (cum, mode, type, named)
3772 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
3773 enum machine_mode mode ATTRIBUTE_UNUSED;
3775 int named ATTRIBUTE_UNUSED;
3777 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3780 /* True if it is OK to do sibling call optimization for the specified
3781 call expression EXP. DECL will be the called function, or NULL if
3782 this is an indirect call. */
3784 ia64_function_ok_for_sibcall (decl, exp)
3786 tree exp ATTRIBUTE_UNUSED;
3788 /* We must always return with our current GP. This means we can
3789 only sibcall to functions defined in the current module. */
3790 return decl && (*targetm.binds_local_p) (decl);
3794 /* Implement va_arg. */
3797 ia64_va_arg (valist, type)
3802 /* Variable sized types are passed by reference. */
3803 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3805 rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type));
3806 return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
3809 /* Arguments with alignment larger than 8 bytes start at the next even
3811 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3813 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3814 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3815 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3816 build_int_2 (-2 * UNITS_PER_WORD, -1));
3817 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3818 TREE_SIDE_EFFECTS (t) = 1;
3819 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3822 return std_expand_builtin_va_arg (valist, type);
3825 /* Return 1 if function return value returned in memory. Return 0 if it is
3829 ia64_return_in_memory (valtype)
3832 enum machine_mode mode;
3833 enum machine_mode hfa_mode;
3834 HOST_WIDE_INT byte_size;
3836 mode = TYPE_MODE (valtype);
3837 byte_size = GET_MODE_SIZE (mode);
3838 if (mode == BLKmode)
3840 byte_size = int_size_in_bytes (valtype);
3845 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3847 hfa_mode = hfa_element_mode (valtype, 0);
3848 if (hfa_mode != VOIDmode)
3850 int hfa_size = GET_MODE_SIZE (hfa_mode);
3852 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3857 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3863 /* Return rtx for register that holds the function return value. */
3866 ia64_function_value (valtype, func)
3868 tree func ATTRIBUTE_UNUSED;
3870 enum machine_mode mode;
3871 enum machine_mode hfa_mode;
3873 mode = TYPE_MODE (valtype);
3874 hfa_mode = hfa_element_mode (valtype, 0);
3876 if (hfa_mode != VOIDmode)
3884 hfa_size = GET_MODE_SIZE (hfa_mode);
3885 byte_size = ((mode == BLKmode)
3886 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3888 for (i = 0; offset < byte_size; i++)
3890 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3891 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3897 return XEXP (loc[0], 0);
3899 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3901 else if (FLOAT_TYPE_P (valtype) &&
3902 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3903 return gen_rtx_REG (mode, FR_ARG_FIRST);
3906 if (BYTES_BIG_ENDIAN
3907 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3915 bytesize = int_size_in_bytes (valtype);
3916 for (i = 0; offset < bytesize; i++)
3918 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3919 gen_rtx_REG (DImode,
3922 offset += UNITS_PER_WORD;
3924 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3927 return gen_rtx_REG (mode, GR_RET_FIRST);
3931 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
3932 We need to emit DTP-relative relocations. */
3935 ia64_output_dwarf_dtprel (file, size, x)
3942 fputs ("\tdata8.ua\t@dtprel(", file);
3943 output_addr_const (file, x);
3947 /* Print a memory address as an operand to reference that memory location. */
3949 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3950 also call this from ia64_print_operand for memory addresses. */
3953 ia64_print_operand_address (stream, address)
3954 FILE * stream ATTRIBUTE_UNUSED;
3955 rtx address ATTRIBUTE_UNUSED;
3959 /* Print an operand to an assembler instruction.
3960 C Swap and print a comparison operator.
3961 D Print an FP comparison operator.
3962 E Print 32 - constant, for SImode shifts as extract.
3963 e Print 64 - constant, for DImode rotates.
3964 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3965 a floating point register emitted normally.
3966 I Invert a predicate register by adding 1.
3967 J Select the proper predicate register for a condition.
3968 j Select the inverse predicate register for a condition.
3969 O Append .acq for volatile load.
3970 P Postincrement of a MEM.
3971 Q Append .rel for volatile store.
3972 S Shift amount for shladd instruction.
3973 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3974 for Intel assembler.
3975 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3976 for Intel assembler.
3977 r Print register name, or constant 0 as r0. HP compatibility for
3980 ia64_print_operand (file, x, code)
3990 /* Handled below. */
3995 enum rtx_code c = swap_condition (GET_CODE (x));
3996 fputs (GET_RTX_NAME (c), file);
4001 switch (GET_CODE (x))
4013 str = GET_RTX_NAME (GET_CODE (x));
4020 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4024 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4028 if (x == CONST0_RTX (GET_MODE (x)))
4029 str = reg_names [FR_REG (0)];
4030 else if (x == CONST1_RTX (GET_MODE (x)))
4031 str = reg_names [FR_REG (1)];
4032 else if (GET_CODE (x) == REG)
4033 str = reg_names [REGNO (x)];
4040 fputs (reg_names [REGNO (x) + 1], file);
4046 unsigned int regno = REGNO (XEXP (x, 0));
4047 if (GET_CODE (x) == EQ)
4051 fputs (reg_names [regno], file);
4056 if (MEM_VOLATILE_P (x))
4057 fputs(".acq", file);
4062 HOST_WIDE_INT value;
4064 switch (GET_CODE (XEXP (x, 0)))
4070 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4071 if (GET_CODE (x) == CONST_INT)
4073 else if (GET_CODE (x) == REG)
4075 fprintf (file, ", %s", reg_names[REGNO (x)]);
4083 value = GET_MODE_SIZE (GET_MODE (x));
4087 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4091 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4096 if (MEM_VOLATILE_P (x))
4097 fputs(".rel", file);
4101 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4105 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4107 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4113 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4115 const char *prefix = "0x";
4116 if (INTVAL (x) & 0x80000000)
4118 fprintf (file, "0xffffffff");
4121 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4127 /* If this operand is the constant zero, write it as register zero.
4128 Any register, zero, or CONST_INT value is OK here. */
4129 if (GET_CODE (x) == REG)
4130 fputs (reg_names[REGNO (x)], file);
4131 else if (x == CONST0_RTX (GET_MODE (x)))
4133 else if (GET_CODE (x) == CONST_INT)
4134 output_addr_const (file, x);
4136 output_operand_lossage ("invalid %%r value");
4143 /* For conditional branches, returns or calls, substitute
4144 sptk, dptk, dpnt, or spnt for %s. */
4145 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4148 int pred_val = INTVAL (XEXP (x, 0));
4150 /* Guess top and bottom 10% statically predicted. */
4151 if (pred_val < REG_BR_PROB_BASE / 50)
4153 else if (pred_val < REG_BR_PROB_BASE / 2)
4155 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4160 else if (GET_CODE (current_output_insn) == CALL_INSN)
4165 fputs (which, file);
4170 x = current_insn_predicate;
4173 unsigned int regno = REGNO (XEXP (x, 0));
4174 if (GET_CODE (x) == EQ)
4176 fprintf (file, "(%s) ", reg_names [regno]);
4181 output_operand_lossage ("ia64_print_operand: unknown code");
4185 switch (GET_CODE (x))
4187 /* This happens for the spill/restore instructions. */
4192 /* ... fall through ... */
4195 fputs (reg_names [REGNO (x)], file);
4200 rtx addr = XEXP (x, 0);
4201 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
4202 addr = XEXP (addr, 0);
4203 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4208 output_addr_const (file, x);
4215 /* Compute a (partial) cost for rtx X. Return true if the complete
4216 cost has been computed, and false if subexpressions should be
4217 scanned. In either case, *TOTAL contains the cost result. */
4218 /* ??? This is incomplete. */
4221 ia64_rtx_costs (x, code, outer_code, total)
4223 int code, outer_code;
4232 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4235 if (CONST_OK_FOR_I (INTVAL (x)))
4237 else if (CONST_OK_FOR_J (INTVAL (x)))
4240 *total = COSTS_N_INSNS (1);
4243 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4246 *total = COSTS_N_INSNS (1);
4251 *total = COSTS_N_INSNS (1);
4257 *total = COSTS_N_INSNS (3);
4261 /* For multiplies wider than HImode, we have to go to the FPU,
4262 which normally involves copies. Plus there's the latency
4263 of the multiply itself, and the latency of the instructions to
4264 transfer integer regs to FP regs. */
4265 /* ??? Check for FP mode. */
4266 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4267 *total = COSTS_N_INSNS (10);
4269 *total = COSTS_N_INSNS (2);
4277 *total = COSTS_N_INSNS (1);
4284 /* We make divide expensive, so that divide-by-constant will be
4285 optimized to a multiply. */
4286 *total = COSTS_N_INSNS (60);
4294 /* Calculate the cost of moving data from a register in class FROM to
4295 one in class TO, using MODE. */
4298 ia64_register_move_cost (mode, from, to)
4299 enum machine_mode mode;
4300 enum reg_class from, to;
4302 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4303 if (to == ADDL_REGS)
4305 if (from == ADDL_REGS)
4308 /* All costs are symmetric, so reduce cases by putting the
4309 lower number class as the destination. */
4312 enum reg_class tmp = to;
4313 to = from, from = tmp;
4316 /* Moving from FR<->GR in TFmode must be more expensive than 2,
4317 so that we get secondary memory reloads. Between FR_REGS,
4318 we have to make this at least as expensive as MEMORY_MOVE_COST
4319 to avoid spectacularly poor register class preferencing. */
4322 if (to != GR_REGS || from != GR_REGS)
4323 return MEMORY_MOVE_COST (mode, to, 0);
4331 /* Moving between PR registers takes two insns. */
4332 if (from == PR_REGS)
4334 /* Moving between PR and anything but GR is impossible. */
4335 if (from != GR_REGS)
4336 return MEMORY_MOVE_COST (mode, to, 0);
4340 /* Moving between BR and anything but GR is impossible. */
4341 if (from != GR_REGS && from != GR_AND_BR_REGS)
4342 return MEMORY_MOVE_COST (mode, to, 0);
4347 /* Moving between AR and anything but GR is impossible. */
4348 if (from != GR_REGS)
4349 return MEMORY_MOVE_COST (mode, to, 0);
4354 case GR_AND_FR_REGS:
4355 case GR_AND_BR_REGS:
4366 /* This function returns the register class required for a secondary
4367 register when copying between one of the registers in CLASS, and X,
4368 using MODE. A return value of NO_REGS means that no secondary register
4372 ia64_secondary_reload_class (class, mode, x)
4373 enum reg_class class;
4374 enum machine_mode mode ATTRIBUTE_UNUSED;
4379 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4380 regno = true_regnum (x);
4387 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4388 interaction. We end up with two pseudos with overlapping lifetimes
4389 both of which are equiv to the same constant, and both which need
4390 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4391 changes depending on the path length, which means the qty_first_reg
4392 check in make_regs_eqv can give different answers at different times.
4393 At some point I'll probably need a reload_indi pattern to handle
4396 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4397 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4398 non-general registers for good measure. */
4399 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4402 /* This is needed if a pseudo used as a call_operand gets spilled to a
4404 if (GET_CODE (x) == MEM)
4409 /* Need to go through general registers to get to other class regs. */
4410 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4413 /* This can happen when a paradoxical subreg is an operand to the
4415 /* ??? This shouldn't be necessary after instruction scheduling is
4416 enabled, because paradoxical subregs are not accepted by
4417 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4418 stop the paradoxical subreg stupidity in the *_operand functions
4420 if (GET_CODE (x) == MEM
4421 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4422 || GET_MODE (x) == QImode))
4425 /* This can happen because of the ior/and/etc patterns that accept FP
4426 registers as operands. If the third operand is a constant, then it
4427 needs to be reloaded into a FP register. */
4428 if (GET_CODE (x) == CONST_INT)
4431 /* This can happen because of register elimination in a muldi3 insn.
4432 E.g. `26107 * (unsigned long)&u'. */
4433 if (GET_CODE (x) == PLUS)
4438 /* ??? This happens if we cse/gcse a BImode value across a call,
4439 and the function has a nonlocal goto. This is because global
4440 does not allocate call crossing pseudos to hard registers when
4441 current_function_has_nonlocal_goto is true. This is relatively
4442 common for C++ programs that use exceptions. To reproduce,
4443 return NO_REGS and compile libstdc++. */
4444 if (GET_CODE (x) == MEM)
4447 /* This can happen when we take a BImode subreg of a DImode value,
4448 and that DImode value winds up in some non-GR register. */
4449 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4454 /* Since we have no offsettable memory addresses, we need a temporary
4455 to hold the address of the second word. */
4468 /* Emit text to declare externally defined variables and functions, because
4469 the Intel assembler does not support undefined externals. */
4472 ia64_asm_output_external (file, decl, name)
4477 int save_referenced;
4479 /* GNU as does not need anything here, but the HP linker does need
4480 something for external functions. */
4484 || TREE_CODE (decl) != FUNCTION_DECL
4485 || strstr(name, "__builtin_") == name))
4488 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4489 the linker when we do this, so we need to be careful not to do this for
4490 builtin functions which have no library equivalent. Unfortunately, we
4491 can't tell here whether or not a function will actually be called by
4492 expand_expr, so we pull in library functions even if we may not need
4494 if (! strcmp (name, "__builtin_next_arg")
4495 || ! strcmp (name, "alloca")
4496 || ! strcmp (name, "__builtin_constant_p")
4497 || ! strcmp (name, "__builtin_args_info"))
4501 ia64_hpux_add_extern_decl (name);
4504 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4506 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4507 if (TREE_CODE (decl) == FUNCTION_DECL)
4508 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4509 (*targetm.asm_out.globalize_label) (file, name);
4510 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4514 /* Parse the -mfixed-range= option string. */
4517 fix_range (const_str)
4518 const char *const_str;
4521 char *str, *dash, *comma;
4523 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4524 REG2 are either register names or register numbers. The effect
4525 of this option is to mark the registers in the range from REG1 to
4526 REG2 as ``fixed'' so they won't be used by the compiler. This is
4527 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4529 i = strlen (const_str);
4530 str = (char *) alloca (i + 1);
4531 memcpy (str, const_str, i + 1);
4535 dash = strchr (str, '-');
4538 warning ("value of -mfixed-range must have form REG1-REG2");
4543 comma = strchr (dash + 1, ',');
4547 first = decode_reg_name (str);
4550 warning ("unknown register name: %s", str);
4554 last = decode_reg_name (dash + 1);
4557 warning ("unknown register name: %s", dash + 1);
4565 warning ("%s-%s is an empty range", str, dash + 1);
4569 for (i = first; i <= last; ++i)
4570 fixed_regs[i] = call_used_regs[i] = 1;
4580 static struct machine_function *
4581 ia64_init_machine_status ()
4583 return ggc_alloc_cleared (sizeof (struct machine_function));
4586 /* Handle TARGET_OPTIONS switches. */
4589 ia64_override_options ()
4593 const char *const name; /* processor name or nickname. */
4594 const enum processor_type processor;
4596 const processor_alias_table[] =
4598 {"itanium", PROCESSOR_ITANIUM},
4599 {"itanium1", PROCESSOR_ITANIUM},
4600 {"merced", PROCESSOR_ITANIUM},
4601 {"itanium2", PROCESSOR_ITANIUM2},
4602 {"mckinley", PROCESSOR_ITANIUM2},
4605 int const pta_size = ARRAY_SIZE (processor_alias_table);
4608 if (TARGET_AUTO_PIC)
4609 target_flags |= MASK_CONST_GP;
4611 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4613 warning ("cannot optimize floating point division for both latency and throughput");
4614 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4617 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4619 warning ("cannot optimize integer division for both latency and throughput");
4620 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4623 if (ia64_fixed_range_string)
4624 fix_range (ia64_fixed_range_string);
4626 if (ia64_tls_size_string)
4629 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4630 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4631 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4633 ia64_tls_size = tmp;
4636 if (!ia64_tune_string)
4637 ia64_tune_string = "itanium2";
4639 for (i = 0; i < pta_size; i++)
4640 if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
4642 ia64_tune = processor_alias_table[i].processor;
4647 error ("bad value (%s) for -tune= switch", ia64_tune_string);
4649 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4650 flag_schedule_insns_after_reload = 0;
4652 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4654 init_machine_status = ia64_init_machine_status;
4656 /* Tell the compiler which flavor of TFmode we're using. */
4657 if (INTEL_EXTENDED_IEEE_FORMAT)
4658 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
4661 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
4662 static enum attr_type ia64_safe_type PARAMS((rtx));
4664 static enum attr_itanium_class
4665 ia64_safe_itanium_class (insn)
4668 if (recog_memoized (insn) >= 0)
4669 return get_attr_itanium_class (insn);
4671 return ITANIUM_CLASS_UNKNOWN;
4674 static enum attr_type
4675 ia64_safe_type (insn)
4678 if (recog_memoized (insn) >= 0)
4679 return get_attr_type (insn);
4681 return TYPE_UNKNOWN;
4684 /* The following collection of routines emit instruction group stop bits as
4685 necessary to avoid dependencies. */
4687 /* Need to track some additional registers as far as serialization is
4688 concerned so we can properly handle br.call and br.ret. We could
4689 make these registers visible to gcc, but since these registers are
4690 never explicitly used in gcc generated code, it seems wasteful to
4691 do so (plus it would make the call and return patterns needlessly
4693 #define REG_GP (GR_REG (1))
4694 #define REG_RP (BR_REG (0))
4695 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4696 /* This is used for volatile asms which may require a stop bit immediately
4697 before and after them. */
4698 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4699 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4700 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4702 /* For each register, we keep track of how it has been written in the
4703 current instruction group.
4705 If a register is written unconditionally (no qualifying predicate),
4706 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4708 If a register is written if its qualifying predicate P is true, we
4709 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4710 may be written again by the complement of P (P^1) and when this happens,
4711 WRITE_COUNT gets set to 2.
4713 The result of this is that whenever an insn attempts to write a register
4714 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4716 If a predicate register is written by a floating-point insn, we set
4717 WRITTEN_BY_FP to true.
4719 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4720 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4722 struct reg_write_state
4724 unsigned int write_count : 2;
4725 unsigned int first_pred : 16;
4726 unsigned int written_by_fp : 1;
4727 unsigned int written_by_and : 1;
4728 unsigned int written_by_or : 1;
4731 /* Cumulative info for the current instruction group. */
4732 struct reg_write_state rws_sum[NUM_REGS];
4733 /* Info for the current instruction. This gets copied to rws_sum after a
4734 stop bit is emitted. */
4735 struct reg_write_state rws_insn[NUM_REGS];
4737 /* Indicates whether this is the first instruction after a stop bit,
4738 in which case we don't need another stop bit. Without this, we hit
4739 the abort in ia64_variable_issue when scheduling an alloc. */
4740 static int first_instruction;
4742 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4743 RTL for one instruction. */
4746 unsigned int is_write : 1; /* Is register being written? */
4747 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4748 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4749 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4750 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4751 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4754 static void rws_update PARAMS ((struct reg_write_state *, int,
4755 struct reg_flags, int));
4756 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4757 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4758 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4759 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4760 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4761 static void init_insn_group_barriers PARAMS ((void));
4762 static int group_barrier_needed_p PARAMS ((rtx));
4763 static int safe_group_barrier_needed_p PARAMS ((rtx));
4765 /* Update *RWS for REGNO, which is being written by the current instruction,
4766 with predicate PRED, and associated register flags in FLAGS. */
4769 rws_update (rws, regno, flags, pred)
4770 struct reg_write_state *rws;
4772 struct reg_flags flags;
4776 rws[regno].write_count++;
4778 rws[regno].write_count = 2;
4779 rws[regno].written_by_fp |= flags.is_fp;
4780 /* ??? Not tracking and/or across differing predicates. */
4781 rws[regno].written_by_and = flags.is_and;
4782 rws[regno].written_by_or = flags.is_or;
4783 rws[regno].first_pred = pred;
4786 /* Handle an access to register REGNO of type FLAGS using predicate register
4787 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4788 a dependency with an earlier instruction in the same group. */
4791 rws_access_regno (regno, flags, pred)
4793 struct reg_flags flags;
4796 int need_barrier = 0;
4798 if (regno >= NUM_REGS)
4801 if (! PR_REGNO_P (regno))
4802 flags.is_and = flags.is_or = 0;
4808 /* One insn writes same reg multiple times? */
4809 if (rws_insn[regno].write_count > 0)
4812 /* Update info for current instruction. */
4813 rws_update (rws_insn, regno, flags, pred);
4814 write_count = rws_sum[regno].write_count;
4816 switch (write_count)
4819 /* The register has not been written yet. */
4820 rws_update (rws_sum, regno, flags, pred);
4824 /* The register has been written via a predicate. If this is
4825 not a complementary predicate, then we need a barrier. */
4826 /* ??? This assumes that P and P+1 are always complementary
4827 predicates for P even. */
4828 if (flags.is_and && rws_sum[regno].written_by_and)
4830 else if (flags.is_or && rws_sum[regno].written_by_or)
4832 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4834 rws_update (rws_sum, regno, flags, pred);
4838 /* The register has been unconditionally written already. We
4840 if (flags.is_and && rws_sum[regno].written_by_and)
4842 else if (flags.is_or && rws_sum[regno].written_by_or)
4846 rws_sum[regno].written_by_and = flags.is_and;
4847 rws_sum[regno].written_by_or = flags.is_or;
4856 if (flags.is_branch)
4858 /* Branches have several RAW exceptions that allow to avoid
4861 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4862 /* RAW dependencies on branch regs are permissible as long
4863 as the writer is a non-branch instruction. Since we
4864 never generate code that uses a branch register written
4865 by a branch instruction, handling this case is
4869 if (REGNO_REG_CLASS (regno) == PR_REGS
4870 && ! rws_sum[regno].written_by_fp)
4871 /* The predicates of a branch are available within the
4872 same insn group as long as the predicate was written by
4873 something other than a floating-point instruction. */
4877 if (flags.is_and && rws_sum[regno].written_by_and)
4879 if (flags.is_or && rws_sum[regno].written_by_or)
4882 switch (rws_sum[regno].write_count)
4885 /* The register has not been written yet. */
4889 /* The register has been written via a predicate. If this is
4890 not a complementary predicate, then we need a barrier. */
4891 /* ??? This assumes that P and P+1 are always complementary
4892 predicates for P even. */
4893 if ((rws_sum[regno].first_pred ^ 1) != pred)
4898 /* The register has been unconditionally written already. We
4908 return need_barrier;
4912 rws_access_reg (reg, flags, pred)
4914 struct reg_flags flags;
4917 int regno = REGNO (reg);
4918 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4921 return rws_access_regno (regno, flags, pred);
4924 int need_barrier = 0;
4926 need_barrier |= rws_access_regno (regno + n, flags, pred);
4927 return need_barrier;
4931 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4932 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4935 update_set_flags (x, pflags, ppred, pcond)
4937 struct reg_flags *pflags;
4941 rtx src = SET_SRC (x);
4945 switch (GET_CODE (src))
4951 if (SET_DEST (x) == pc_rtx)
4952 /* X is a conditional branch. */
4956 int is_complemented = 0;
4958 /* X is a conditional move. */
4959 rtx cond = XEXP (src, 0);
4960 if (GET_CODE (cond) == EQ)
4961 is_complemented = 1;
4962 cond = XEXP (cond, 0);
4963 if (GET_CODE (cond) != REG
4964 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4967 if (XEXP (src, 1) == SET_DEST (x)
4968 || XEXP (src, 2) == SET_DEST (x))
4970 /* X is a conditional move that conditionally writes the
4973 /* We need another complement in this case. */
4974 if (XEXP (src, 1) == SET_DEST (x))
4975 is_complemented = ! is_complemented;
4977 *ppred = REGNO (cond);
4978 if (is_complemented)
4982 /* ??? If this is a conditional write to the dest, then this
4983 instruction does not actually read one source. This probably
4984 doesn't matter, because that source is also the dest. */
4985 /* ??? Multiple writes to predicate registers are allowed
4986 if they are all AND type compares, or if they are all OR
4987 type compares. We do not generate such instructions
4990 /* ... fall through ... */
4993 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4994 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4995 /* Set pflags->is_fp to 1 so that we know we're dealing
4996 with a floating point comparison when processing the
4997 destination of the SET. */
5000 /* Discover if this is a parallel comparison. We only handle
5001 and.orcm and or.andcm at present, since we must retain a
5002 strict inverse on the predicate pair. */
5003 else if (GET_CODE (src) == AND)
5005 else if (GET_CODE (src) == IOR)
5012 /* Subroutine of rtx_needs_barrier; this function determines whether the
5013 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5014 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5018 set_src_needs_barrier (x, flags, pred, cond)
5020 struct reg_flags flags;
5024 int need_barrier = 0;
5026 rtx src = SET_SRC (x);
5028 if (GET_CODE (src) == CALL)
5029 /* We don't need to worry about the result registers that
5030 get written by subroutine call. */
5031 return rtx_needs_barrier (src, flags, pred);
5032 else if (SET_DEST (x) == pc_rtx)
5034 /* X is a conditional branch. */
5035 /* ??? This seems redundant, as the caller sets this bit for
5037 flags.is_branch = 1;
5038 return rtx_needs_barrier (src, flags, pred);
5041 need_barrier = rtx_needs_barrier (src, flags, pred);
5043 /* This instruction unconditionally uses a predicate register. */
5045 need_barrier |= rws_access_reg (cond, flags, 0);
5048 if (GET_CODE (dst) == ZERO_EXTRACT)
5050 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5051 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5052 dst = XEXP (dst, 0);
5054 return need_barrier;
5057 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
5058 Return 1 is this access creates a dependency with an earlier instruction
5059 in the same group. */
5062 rtx_needs_barrier (x, flags, pred)
5064 struct reg_flags flags;
5068 int is_complemented = 0;
5069 int need_barrier = 0;
5070 const char *format_ptr;
5071 struct reg_flags new_flags;
5079 switch (GET_CODE (x))
5082 update_set_flags (x, &new_flags, &pred, &cond);
5083 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
5084 if (GET_CODE (SET_SRC (x)) != CALL)
5086 new_flags.is_write = 1;
5087 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5092 new_flags.is_write = 0;
5093 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5095 /* Avoid multiple register writes, in case this is a pattern with
5096 multiple CALL rtx. This avoids an abort in rws_access_reg. */
5097 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5099 new_flags.is_write = 1;
5100 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5101 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5102 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5107 /* X is a predicated instruction. */
5109 cond = COND_EXEC_TEST (x);
5112 need_barrier = rtx_needs_barrier (cond, flags, 0);
5114 if (GET_CODE (cond) == EQ)
5115 is_complemented = 1;
5116 cond = XEXP (cond, 0);
5117 if (GET_CODE (cond) != REG
5118 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5120 pred = REGNO (cond);
5121 if (is_complemented)
5124 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5125 return need_barrier;
5129 /* Clobber & use are for earlier compiler-phases only. */
5134 /* We always emit stop bits for traditional asms. We emit stop bits
5135 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5136 if (GET_CODE (x) != ASM_OPERANDS
5137 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5139 /* Avoid writing the register multiple times if we have multiple
5140 asm outputs. This avoids an abort in rws_access_reg. */
5141 if (! rws_insn[REG_VOLATILE].write_count)
5143 new_flags.is_write = 1;
5144 rws_access_regno (REG_VOLATILE, new_flags, pred);
5149 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5150 We can not just fall through here since then we would be confused
5151 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5152 traditional asms unlike their normal usage. */
5154 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5155 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5160 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5162 rtx pat = XVECEXP (x, 0, i);
5163 if (GET_CODE (pat) == SET)
5165 update_set_flags (pat, &new_flags, &pred, &cond);
5166 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
5168 else if (GET_CODE (pat) == USE
5169 || GET_CODE (pat) == CALL
5170 || GET_CODE (pat) == ASM_OPERANDS)
5171 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5172 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
5175 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5177 rtx pat = XVECEXP (x, 0, i);
5178 if (GET_CODE (pat) == SET)
5180 if (GET_CODE (SET_SRC (pat)) != CALL)
5182 new_flags.is_write = 1;
5183 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5187 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5188 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5196 if (REGNO (x) == AR_UNAT_REGNUM)
5198 for (i = 0; i < 64; ++i)
5199 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5202 need_barrier = rws_access_reg (x, flags, pred);
5206 /* Find the regs used in memory address computation. */
5207 new_flags.is_write = 0;
5208 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5211 case CONST_INT: case CONST_DOUBLE:
5212 case SYMBOL_REF: case LABEL_REF: case CONST:
5215 /* Operators with side-effects. */
5216 case POST_INC: case POST_DEC:
5217 if (GET_CODE (XEXP (x, 0)) != REG)
5220 new_flags.is_write = 0;
5221 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5222 new_flags.is_write = 1;
5223 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5227 if (GET_CODE (XEXP (x, 0)) != REG)
5230 new_flags.is_write = 0;
5231 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5232 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5233 new_flags.is_write = 1;
5234 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5237 /* Handle common unary and binary ops for efficiency. */
5238 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5239 case MOD: case UDIV: case UMOD: case AND: case IOR:
5240 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5241 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5242 case NE: case EQ: case GE: case GT: case LE:
5243 case LT: case GEU: case GTU: case LEU: case LTU:
5244 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5245 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5248 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5249 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5250 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5251 case SQRT: case FFS: case POPCOUNT:
5252 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5256 switch (XINT (x, 1))
5258 case UNSPEC_LTOFF_DTPMOD:
5259 case UNSPEC_LTOFF_DTPREL:
5261 case UNSPEC_LTOFF_TPREL:
5263 case UNSPEC_PRED_REL_MUTEX:
5264 case UNSPEC_PIC_CALL:
5266 case UNSPEC_FETCHADD_ACQ:
5267 case UNSPEC_BSP_VALUE:
5268 case UNSPEC_FLUSHRS:
5269 case UNSPEC_BUNDLE_SELECTOR:
5272 case UNSPEC_GR_SPILL:
5273 case UNSPEC_GR_RESTORE:
5275 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5276 HOST_WIDE_INT bit = (offset >> 3) & 63;
5278 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5279 new_flags.is_write = (XINT (x, 1) == 1);
5280 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5285 case UNSPEC_FR_SPILL:
5286 case UNSPEC_FR_RESTORE:
5287 case UNSPEC_GETF_EXP:
5289 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5292 case UNSPEC_FR_RECIP_APPROX:
5293 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5294 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5297 case UNSPEC_CMPXCHG_ACQ:
5298 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5299 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5307 case UNSPEC_VOLATILE:
5308 switch (XINT (x, 1))
5311 /* Alloc must always be the first instruction of a group.
5312 We force this by always returning true. */
5313 /* ??? We might get better scheduling if we explicitly check for
5314 input/local/output register dependencies, and modify the
5315 scheduler so that alloc is always reordered to the start of
5316 the current group. We could then eliminate all of the
5317 first_instruction code. */
5318 rws_access_regno (AR_PFS_REGNUM, flags, pred);
5320 new_flags.is_write = 1;
5321 rws_access_regno (REG_AR_CFM, new_flags, pred);
5324 case UNSPECV_SET_BSP:
5328 case UNSPECV_BLOCKAGE:
5329 case UNSPECV_INSN_GROUP_BARRIER:
5331 case UNSPECV_PSAC_ALL:
5332 case UNSPECV_PSAC_NORMAL:
5341 new_flags.is_write = 0;
5342 need_barrier = rws_access_regno (REG_RP, flags, pred);
5343 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5345 new_flags.is_write = 1;
5346 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5347 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5351 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5352 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5353 switch (format_ptr[i])
5355 case '0': /* unused field */
5356 case 'i': /* integer */
5357 case 'n': /* note */
5358 case 'w': /* wide integer */
5359 case 's': /* pointer to string */
5360 case 'S': /* optional pointer to string */
5364 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5369 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5370 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5379 return need_barrier;
5382 /* Clear out the state for group_barrier_needed_p at the start of a
5383 sequence of insns. */
5386 init_insn_group_barriers ()
5388 memset (rws_sum, 0, sizeof (rws_sum));
5389 first_instruction = 1;
5392 /* Given the current state, recorded by previous calls to this function,
5393 determine whether a group barrier (a stop bit) is necessary before INSN.
5394 Return nonzero if so. */
5397 group_barrier_needed_p (insn)
5401 int need_barrier = 0;
5402 struct reg_flags flags;
5404 memset (&flags, 0, sizeof (flags));
5405 switch (GET_CODE (insn))
5411 /* A barrier doesn't imply an instruction group boundary. */
5415 memset (rws_insn, 0, sizeof (rws_insn));
5419 flags.is_branch = 1;
5420 flags.is_sibcall = SIBLING_CALL_P (insn);
5421 memset (rws_insn, 0, sizeof (rws_insn));
5423 /* Don't bundle a call following another call. */
5424 if ((pat = prev_active_insn (insn))
5425 && GET_CODE (pat) == CALL_INSN)
5431 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5435 flags.is_branch = 1;
5437 /* Don't bundle a jump following a call. */
5438 if ((pat = prev_active_insn (insn))
5439 && GET_CODE (pat) == CALL_INSN)
5447 if (GET_CODE (PATTERN (insn)) == USE
5448 || GET_CODE (PATTERN (insn)) == CLOBBER)
5449 /* Don't care about USE and CLOBBER "insns"---those are used to
5450 indicate to the optimizer that it shouldn't get rid of
5451 certain operations. */
5454 pat = PATTERN (insn);
5456 /* Ug. Hack hacks hacked elsewhere. */
5457 switch (recog_memoized (insn))
5459 /* We play dependency tricks with the epilogue in order
5460 to get proper schedules. Undo this for dv analysis. */
5461 case CODE_FOR_epilogue_deallocate_stack:
5462 case CODE_FOR_prologue_allocate_stack:
5463 pat = XVECEXP (pat, 0, 0);
5466 /* The pattern we use for br.cloop confuses the code above.
5467 The second element of the vector is representative. */
5468 case CODE_FOR_doloop_end_internal:
5469 pat = XVECEXP (pat, 0, 1);
5472 /* Doesn't generate code. */
5473 case CODE_FOR_pred_rel_mutex:
5474 case CODE_FOR_prologue_use:
5481 memset (rws_insn, 0, sizeof (rws_insn));
5482 need_barrier = rtx_needs_barrier (pat, flags, 0);
5484 /* Check to see if the previous instruction was a volatile
5487 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5494 if (first_instruction && INSN_P (insn)
5495 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5496 && GET_CODE (PATTERN (insn)) != USE
5497 && GET_CODE (PATTERN (insn)) != CLOBBER)
5500 first_instruction = 0;
5503 return need_barrier;
5506 /* Like group_barrier_needed_p, but do not clobber the current state. */
5509 safe_group_barrier_needed_p (insn)
5512 struct reg_write_state rws_saved[NUM_REGS];
5513 int saved_first_instruction;
5516 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5517 saved_first_instruction = first_instruction;
5519 t = group_barrier_needed_p (insn);
5521 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5522 first_instruction = saved_first_instruction;
5527 /* Scan the current function and insert stop bits as necessary to
5528 eliminate dependencies. This function assumes that a final
5529 instruction scheduling pass has been run which has already
5530 inserted most of the necessary stop bits. This function only
5531 inserts new ones at basic block boundaries, since these are
5532 invisible to the scheduler. */
5535 emit_insn_group_barriers (dump)
5540 int insns_since_last_label = 0;
5542 init_insn_group_barriers ();
5544 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5546 if (GET_CODE (insn) == CODE_LABEL)
5548 if (insns_since_last_label)
5550 insns_since_last_label = 0;
5552 else if (GET_CODE (insn) == NOTE
5553 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5555 if (insns_since_last_label)
5557 insns_since_last_label = 0;
5559 else if (GET_CODE (insn) == INSN
5560 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5561 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5563 init_insn_group_barriers ();
5566 else if (INSN_P (insn))
5568 insns_since_last_label = 1;
5570 if (group_barrier_needed_p (insn))
5575 fprintf (dump, "Emitting stop before label %d\n",
5576 INSN_UID (last_label));
5577 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5580 init_insn_group_barriers ();
5588 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5589 This function has to emit all necessary group barriers. */
5592 emit_all_insn_group_barriers (dump)
5593 FILE *dump ATTRIBUTE_UNUSED;
5597 init_insn_group_barriers ();
5599 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5601 if (GET_CODE (insn) == BARRIER)
5603 rtx last = prev_active_insn (insn);
5607 if (GET_CODE (last) == JUMP_INSN
5608 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5609 last = prev_active_insn (last);
5610 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5611 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5613 init_insn_group_barriers ();
5615 else if (INSN_P (insn))
5617 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5618 init_insn_group_barriers ();
5619 else if (group_barrier_needed_p (insn))
5621 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5622 init_insn_group_barriers ();
5623 group_barrier_needed_p (insn);
5630 static int errata_find_address_regs PARAMS ((rtx *, void *));
5631 static void errata_emit_nops PARAMS ((rtx));
5632 static void fixup_errata PARAMS ((void));
5634 /* This structure is used to track some details about the previous insns
5635 groups so we can determine if it may be necessary to insert NOPs to
5636 workaround hardware errata. */
5639 HARD_REG_SET p_reg_set;
5640 HARD_REG_SET gr_reg_conditionally_set;
5643 /* Index into the last_group array. */
5644 static int group_idx;
5646 /* Called through for_each_rtx; determines if a hard register that was
5647 conditionally set in the previous group is used as an address register.
5648 It ensures that for_each_rtx returns 1 in that case. */
5650 errata_find_address_regs (xp, data)
5652 void *data ATTRIBUTE_UNUSED;
5655 if (GET_CODE (x) != MEM)
5658 if (GET_CODE (x) == POST_MODIFY)
5660 if (GET_CODE (x) == REG)
5662 struct group *prev_group = last_group + (group_idx ^ 1);
5663 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5671 /* Called for each insn; this function keeps track of the state in
5672 last_group and emits additional NOPs if necessary to work around
5673 an Itanium A/B step erratum. */
5675 errata_emit_nops (insn)
5678 struct group *this_group = last_group + group_idx;
5679 struct group *prev_group = last_group + (group_idx ^ 1);
5680 rtx pat = PATTERN (insn);
5681 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5682 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5683 enum attr_type type;
5686 if (GET_CODE (real_pat) == USE
5687 || GET_CODE (real_pat) == CLOBBER
5688 || GET_CODE (real_pat) == ASM_INPUT
5689 || GET_CODE (real_pat) == ADDR_VEC
5690 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5691 || asm_noperands (PATTERN (insn)) >= 0)
5694 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5697 if (GET_CODE (set) == PARALLEL)
5700 set = XVECEXP (real_pat, 0, 0);
5701 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5702 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5703 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5710 if (set && GET_CODE (set) != SET)
5713 type = get_attr_type (insn);
5716 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5717 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5719 if ((type == TYPE_M || type == TYPE_A) && cond && set
5720 && REG_P (SET_DEST (set))
5721 && GET_CODE (SET_SRC (set)) != PLUS
5722 && GET_CODE (SET_SRC (set)) != MINUS
5723 && (GET_CODE (SET_SRC (set)) != ASHIFT
5724 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5725 && (GET_CODE (SET_SRC (set)) != MEM
5726 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5727 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5729 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5730 || ! REG_P (XEXP (cond, 0)))
5733 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5734 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5736 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5738 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5739 emit_insn_before (gen_nop (), insn);
5740 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5742 memset (last_group, 0, sizeof last_group);
5746 /* Emit extra nops if they are required to work around hardware errata. */
5753 if (! TARGET_B_STEP)
5757 memset (last_group, 0, sizeof last_group);
5759 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5764 if (ia64_safe_type (insn) == TYPE_S)
5767 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5770 errata_emit_nops (insn);
5775 /* Instruction scheduling support. */
5777 #define NR_BUNDLES 10
5779 /* A list of names of all available bundles. */
5781 static const char *bundle_name [NR_BUNDLES] =
5787 #if NR_BUNDLES == 10
5797 /* Nonzero if we should insert stop bits into the schedule. */
5799 int ia64_final_schedule = 0;
5801 /* Codes of the corresponding quieryied units: */
5803 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5804 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5806 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5807 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5809 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5811 /* The following variable value is an insn group barrier. */
5813 static rtx dfa_stop_insn;
5815 /* The following variable value is the last issued insn. */
5817 static rtx last_scheduled_insn;
5819 /* The following variable value is size of the DFA state. */
5821 static size_t dfa_state_size;
5823 /* The following variable value is pointer to a DFA state used as
5824 temporary variable. */
5826 static state_t temp_dfa_state = NULL;
5828 /* The following variable value is DFA state after issuing the last
5831 static state_t prev_cycle_state = NULL;
5833 /* The following array element values are TRUE if the corresponding
5834 insn requires to add stop bits before it. */
5836 static char *stops_p;
5838 /* The following variable is used to set up the mentioned above array. */
5840 static int stop_before_p = 0;
5842 /* The following variable value is length of the arrays `clocks' and
5845 static int clocks_length;
5847 /* The following array element values are cycles on which the
5848 corresponding insn will be issued. The array is used only for
5853 /* The following array element values are numbers of cycles should be
5854 added to improve insn scheduling for MM_insns for Itanium1. */
5856 static int *add_cycles;
5858 static rtx ia64_single_set PARAMS ((rtx));
5859 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5861 /* Map a bundle number to its pseudo-op. */
5867 return bundle_name[b];
5871 /* Return the maximum number of instructions a cpu can issue. */
5879 /* Helper function - like single_set, but look inside COND_EXEC. */
5882 ia64_single_set (insn)
5885 rtx x = PATTERN (insn), ret;
5886 if (GET_CODE (x) == COND_EXEC)
5887 x = COND_EXEC_CODE (x);
5888 if (GET_CODE (x) == SET)
5891 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5892 Although they are not classical single set, the second set is there just
5893 to protect it from moving past FP-relative stack accesses. */
5894 switch (recog_memoized (insn))
5896 case CODE_FOR_prologue_allocate_stack:
5897 case CODE_FOR_epilogue_deallocate_stack:
5898 ret = XVECEXP (x, 0, 0);
5902 ret = single_set_2 (insn, x);
5909 /* Adjust the cost of a scheduling dependency. Return the new cost of
5910 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5913 ia64_adjust_cost (insn, link, dep_insn, cost)
5914 rtx insn, link, dep_insn;
5917 enum attr_itanium_class dep_class;
5918 enum attr_itanium_class insn_class;
5920 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
5923 insn_class = ia64_safe_itanium_class (insn);
5924 dep_class = ia64_safe_itanium_class (dep_insn);
5925 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
5926 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
5932 /* Like emit_insn_before, but skip cycle_display notes.
5933 ??? When cycle display notes are implemented, update this. */
5936 ia64_emit_insn_before (insn, before)
5939 emit_insn_before (insn, before);
5942 /* The following function marks insns who produce addresses for load
5943 and store insns. Such insns will be placed into M slots because it
5944 decrease latency time for Itanium1 (see function
5945 `ia64_produce_address_p' and the DFA descriptions). */
5948 ia64_dependencies_evaluation_hook (head, tail)
5951 rtx insn, link, next, next_tail;
5953 next_tail = NEXT_INSN (tail);
5954 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5957 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5959 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
5961 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
5963 next = XEXP (link, 0);
5964 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
5965 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
5966 && ia64_st_address_bypass_p (insn, next))
5968 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
5969 || ia64_safe_itanium_class (next)
5970 == ITANIUM_CLASS_FLD)
5971 && ia64_ld_address_bypass_p (insn, next))
5974 insn->call = link != 0;
5978 /* We're beginning a new block. Initialize data structures as necessary. */
5981 ia64_sched_init (dump, sched_verbose, max_ready)
5982 FILE *dump ATTRIBUTE_UNUSED;
5983 int sched_verbose ATTRIBUTE_UNUSED;
5984 int max_ready ATTRIBUTE_UNUSED;
5986 #ifdef ENABLE_CHECKING
5989 if (reload_completed)
5990 for (insn = NEXT_INSN (current_sched_info->prev_head);
5991 insn != current_sched_info->next_tail;
5992 insn = NEXT_INSN (insn))
5993 if (SCHED_GROUP_P (insn))
5996 last_scheduled_insn = NULL_RTX;
5997 init_insn_group_barriers ();
6000 /* We are about to being issuing insns for this clock cycle.
6001 Override the default sort algorithm to better slot instructions. */
6004 ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6005 clock_var, reorder_type)
6010 int clock_var ATTRIBUTE_UNUSED;
6014 int n_ready = *pn_ready;
6015 rtx *e_ready = ready + n_ready;
6019 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6021 if (reorder_type == 0)
6023 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6025 for (insnp = ready; insnp < e_ready; insnp++)
6026 if (insnp < e_ready)
6029 enum attr_type t = ia64_safe_type (insn);
6030 if (t == TYPE_UNKNOWN)
6032 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6033 || asm_noperands (PATTERN (insn)) >= 0)
6035 rtx lowest = ready[n_asms];
6036 ready[n_asms] = insn;
6042 rtx highest = ready[n_ready - 1];
6043 ready[n_ready - 1] = insn;
6050 if (n_asms < n_ready)
6052 /* Some normal insns to process. Skip the asms. */
6056 else if (n_ready > 0)
6060 if (ia64_final_schedule)
6063 int nr_need_stop = 0;
6065 for (insnp = ready; insnp < e_ready; insnp++)
6066 if (safe_group_barrier_needed_p (*insnp))
6069 if (reorder_type == 1 && n_ready == nr_need_stop)
6071 if (reorder_type == 0)
6074 /* Move down everything that needs a stop bit, preserving
6076 while (insnp-- > ready + deleted)
6077 while (insnp >= ready + deleted)
6080 if (! safe_group_barrier_needed_p (insn))
6082 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6093 /* We are about to being issuing insns for this clock cycle. Override
6094 the default sort algorithm to better slot instructions. */
6097 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6104 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6105 pn_ready, clock_var, 0);
6108 /* Like ia64_sched_reorder, but called after issuing each insn.
6109 Override the default sort algorithm to better slot instructions. */
6112 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6113 FILE *dump ATTRIBUTE_UNUSED;
6114 int sched_verbose ATTRIBUTE_UNUSED;
6119 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6120 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6121 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6125 /* We are about to issue INSN. Return the number of insns left on the
6126 ready queue that can be issued this cycle. */
6129 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6130 FILE *dump ATTRIBUTE_UNUSED;
6131 int sched_verbose ATTRIBUTE_UNUSED;
6132 rtx insn ATTRIBUTE_UNUSED;
6133 int can_issue_more ATTRIBUTE_UNUSED;
6135 last_scheduled_insn = insn;
6136 memcpy (prev_cycle_state, curr_state, dfa_state_size);
6137 if (reload_completed)
6139 if (group_barrier_needed_p (insn))
6141 if (GET_CODE (insn) == CALL_INSN)
6142 init_insn_group_barriers ();
6143 stops_p [INSN_UID (insn)] = stop_before_p;
6149 /* We are choosing insn from the ready queue. Return nonzero if INSN
6153 ia64_first_cycle_multipass_dfa_lookahead_guard (insn)
6156 if (insn == NULL_RTX || !INSN_P (insn))
6158 return (!reload_completed
6159 || !safe_group_barrier_needed_p (insn));
6162 /* The following variable value is pseudo-insn used by the DFA insn
6163 scheduler to change the DFA state when the simulated clock is
6166 static rtx dfa_pre_cycle_insn;
6168 /* We are about to being issuing INSN. Return nonzero if we can not
6169 issue it on given cycle CLOCK and return zero if we should not sort
6170 the ready queue on the next clock start. */
6173 ia64_dfa_new_cycle (dump, verbose, insn, last_clock, clock, sort_p)
6177 int last_clock, clock;
6180 int setup_clocks_p = FALSE;
6182 if (insn == NULL_RTX || !INSN_P (insn))
6184 if ((reload_completed && safe_group_barrier_needed_p (insn))
6185 || (last_scheduled_insn
6186 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6187 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6188 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6190 init_insn_group_barriers ();
6191 if (verbose && dump)
6192 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6193 last_clock == clock ? " + cycle advance" : "");
6195 if (last_clock == clock)
6197 state_transition (curr_state, dfa_stop_insn);
6198 if (TARGET_EARLY_STOP_BITS)
6199 *sort_p = (last_scheduled_insn == NULL_RTX
6200 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6205 else if (reload_completed)
6206 setup_clocks_p = TRUE;
6207 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6208 state_transition (curr_state, dfa_stop_insn);
6209 state_transition (curr_state, dfa_pre_cycle_insn);
6210 state_transition (curr_state, NULL);
6212 else if (reload_completed)
6213 setup_clocks_p = TRUE;
6214 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM)
6216 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6218 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6223 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6224 if (REG_NOTE_KIND (link) == 0)
6226 enum attr_itanium_class dep_class;
6227 rtx dep_insn = XEXP (link, 0);
6229 dep_class = ia64_safe_itanium_class (dep_insn);
6230 if ((dep_class == ITANIUM_CLASS_MMMUL
6231 || dep_class == ITANIUM_CLASS_MMSHF)
6232 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6234 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6235 d = last_clock - clocks [INSN_UID (dep_insn)];
6238 add_cycles [INSN_UID (insn)] = 3 - d;
6246 /* The following page contains abstract data `bundle states' which are
6247 used for bundling insns (inserting nops and template generation). */
6249 /* The following describes state of insn bundling. */
6253 /* Unique bundle state number to identify them in the debugging
6256 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
6257 /* number nops before and after the insn */
6258 short before_nops_num, after_nops_num;
6259 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6261 int cost; /* cost of the state in cycles */
6262 int accumulated_insns_num; /* number of all previous insns including
6263 nops. L is considered as 2 insns */
6264 int branch_deviation; /* deviation of previous branches from 3rd slots */
6265 struct bundle_state *next; /* next state with the same insn_num */
6266 struct bundle_state *originator; /* originator (previous insn state) */
6267 /* All bundle states are in the following chain. */
6268 struct bundle_state *allocated_states_chain;
6269 /* The DFA State after issuing the insn and the nops. */
6273 /* The following is map insn number to the corresponding bundle state. */
6275 static struct bundle_state **index_to_bundle_states;
6277 /* The unique number of next bundle state. */
6279 static int bundle_states_num;
6281 /* All allocated bundle states are in the following chain. */
6283 static struct bundle_state *allocated_bundle_states_chain;
6285 /* All allocated but not used bundle states are in the following
6288 static struct bundle_state *free_bundle_state_chain;
6291 /* The following function returns a free bundle state. */
6293 static struct bundle_state *
6294 get_free_bundle_state ()
6296 struct bundle_state *result;
6298 if (free_bundle_state_chain != NULL)
6300 result = free_bundle_state_chain;
6301 free_bundle_state_chain = result->next;
6305 result = xmalloc (sizeof (struct bundle_state));
6306 result->dfa_state = xmalloc (dfa_state_size);
6307 result->allocated_states_chain = allocated_bundle_states_chain;
6308 allocated_bundle_states_chain = result;
6310 result->unique_num = bundle_states_num++;
6315 /* The following function frees given bundle state. */
6318 free_bundle_state (state)
6319 struct bundle_state *state;
6321 state->next = free_bundle_state_chain;
6322 free_bundle_state_chain = state;
6325 /* Start work with abstract data `bundle states'. */
6328 initiate_bundle_states ()
6330 bundle_states_num = 0;
6331 free_bundle_state_chain = NULL;
6332 allocated_bundle_states_chain = NULL;
6335 /* Finish work with abstract data `bundle states'. */
6338 finish_bundle_states ()
6340 struct bundle_state *curr_state, *next_state;
6342 for (curr_state = allocated_bundle_states_chain;
6344 curr_state = next_state)
6346 next_state = curr_state->allocated_states_chain;
6347 free (curr_state->dfa_state);
6352 /* Hash table of the bundle states. The key is dfa_state and insn_num
6353 of the bundle states. */
6355 static htab_t bundle_state_table;
6357 /* The function returns hash of BUNDLE_STATE. */
6360 bundle_state_hash (bundle_state)
6361 const void *bundle_state;
6363 const struct bundle_state *state = (struct bundle_state *) bundle_state;
6366 for (result = i = 0; i < dfa_state_size; i++)
6367 result += (((unsigned char *) state->dfa_state) [i]
6368 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6369 return result + state->insn_num;
6372 /* The function returns nonzero if the bundle state keys are equal. */
6375 bundle_state_eq_p (bundle_state_1, bundle_state_2)
6376 const void *bundle_state_1;
6377 const void *bundle_state_2;
6379 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6380 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6382 return (state1->insn_num == state2->insn_num
6383 && memcmp (state1->dfa_state, state2->dfa_state,
6384 dfa_state_size) == 0);
6387 /* The function inserts the BUNDLE_STATE into the hash table. The
6388 function returns nonzero if the bundle has been inserted into the
6389 table. The table contains the best bundle state with given key. */
6392 insert_bundle_state (bundle_state)
6393 struct bundle_state *bundle_state;
6397 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6398 if (*entry_ptr == NULL)
6400 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6401 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6402 *entry_ptr = (void *) bundle_state;
6405 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6406 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6407 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6408 > bundle_state->accumulated_insns_num
6409 || (((struct bundle_state *)
6410 *entry_ptr)->accumulated_insns_num
6411 == bundle_state->accumulated_insns_num
6412 && ((struct bundle_state *)
6413 *entry_ptr)->branch_deviation
6414 > bundle_state->branch_deviation))))
6417 struct bundle_state temp;
6419 temp = *(struct bundle_state *) *entry_ptr;
6420 *(struct bundle_state *) *entry_ptr = *bundle_state;
6421 ((struct bundle_state *) *entry_ptr)->next = temp.next;
6422 *bundle_state = temp;
6427 /* Start work with the hash table. */
6430 initiate_bundle_state_table ()
6432 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6436 /* Finish work with the hash table. */
6439 finish_bundle_state_table ()
6441 htab_delete (bundle_state_table);
6446 /* The following variable is a insn `nop' used to check bundle states
6447 with different number of inserted nops. */
6449 static rtx ia64_nop;
6451 /* The following function tries to issue NOPS_NUM nops for the current
6452 state without advancing processor cycle. If it failed, the
6453 function returns FALSE and frees the current state. */
6456 try_issue_nops (curr_state, nops_num)
6457 struct bundle_state *curr_state;
6462 for (i = 0; i < nops_num; i++)
6463 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6465 free_bundle_state (curr_state);
6471 /* The following function tries to issue INSN for the current
6472 state without advancing processor cycle. If it failed, the
6473 function returns FALSE and frees the current state. */
6476 try_issue_insn (curr_state, insn)
6477 struct bundle_state *curr_state;
6480 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6482 free_bundle_state (curr_state);
6488 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6489 starting with ORIGINATOR without advancing processor cycle. If
6490 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6491 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6492 If it was successful, the function creates new bundle state and
6493 insert into the hash table and into `index_to_bundle_states'. */
6496 issue_nops_and_insn (originator, before_nops_num, insn, try_bundle_end_p,
6498 struct bundle_state *originator;
6499 int before_nops_num;
6501 int try_bundle_end_p, only_bundle_end_p;
6503 struct bundle_state *curr_state;
6505 curr_state = get_free_bundle_state ();
6506 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6507 curr_state->insn = insn;
6508 curr_state->insn_num = originator->insn_num + 1;
6509 curr_state->cost = originator->cost;
6510 curr_state->originator = originator;
6511 curr_state->before_nops_num = before_nops_num;
6512 curr_state->after_nops_num = 0;
6513 curr_state->accumulated_insns_num
6514 = originator->accumulated_insns_num + before_nops_num;
6515 curr_state->branch_deviation = originator->branch_deviation;
6516 if (insn == NULL_RTX)
6518 else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6520 if (GET_MODE (insn) == TImode)
6522 if (!try_issue_nops (curr_state, before_nops_num))
6524 if (!try_issue_insn (curr_state, insn))
6526 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6527 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6528 && curr_state->accumulated_insns_num % 3 != 0)
6530 free_bundle_state (curr_state);
6534 else if (GET_MODE (insn) != TImode)
6536 if (!try_issue_nops (curr_state, before_nops_num))
6538 if (!try_issue_insn (curr_state, insn))
6540 curr_state->accumulated_insns_num++;
6541 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6542 || asm_noperands (PATTERN (insn)) >= 0)
6544 if (ia64_safe_type (insn) == TYPE_L)
6545 curr_state->accumulated_insns_num++;
6549 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6550 state_transition (curr_state->dfa_state, NULL);
6552 if (!try_issue_nops (curr_state, before_nops_num))
6554 if (!try_issue_insn (curr_state, insn))
6556 curr_state->accumulated_insns_num++;
6557 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6558 || asm_noperands (PATTERN (insn)) >= 0)
6560 /* Finish bundle containing asm insn. */
6561 curr_state->after_nops_num
6562 = 3 - curr_state->accumulated_insns_num % 3;
6563 curr_state->accumulated_insns_num
6564 += 3 - curr_state->accumulated_insns_num % 3;
6566 else if (ia64_safe_type (insn) == TYPE_L)
6567 curr_state->accumulated_insns_num++;
6569 if (ia64_safe_type (insn) == TYPE_B)
6570 curr_state->branch_deviation
6571 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6572 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6574 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6577 struct bundle_state *curr_state1;
6578 struct bundle_state *allocated_states_chain;
6580 curr_state1 = get_free_bundle_state ();
6581 dfa_state = curr_state1->dfa_state;
6582 allocated_states_chain = curr_state1->allocated_states_chain;
6583 *curr_state1 = *curr_state;
6584 curr_state1->dfa_state = dfa_state;
6585 curr_state1->allocated_states_chain = allocated_states_chain;
6586 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6588 curr_state = curr_state1;
6590 if (!try_issue_nops (curr_state,
6591 3 - curr_state->accumulated_insns_num % 3))
6593 curr_state->after_nops_num
6594 = 3 - curr_state->accumulated_insns_num % 3;
6595 curr_state->accumulated_insns_num
6596 += 3 - curr_state->accumulated_insns_num % 3;
6598 if (!insert_bundle_state (curr_state))
6599 free_bundle_state (curr_state);
6603 /* The following function returns position in the two window bundle
6610 if (cpu_unit_reservation_p (state, pos_6))
6612 else if (cpu_unit_reservation_p (state, pos_5))
6614 else if (cpu_unit_reservation_p (state, pos_4))
6616 else if (cpu_unit_reservation_p (state, pos_3))
6618 else if (cpu_unit_reservation_p (state, pos_2))
6620 else if (cpu_unit_reservation_p (state, pos_1))
6626 /* The function returns code of a possible template for given position
6627 and state. The function should be called only with 2 values of
6628 position equal to 3 or 6. */
6631 get_template (state, pos)
6638 if (cpu_unit_reservation_p (state, _0mii_))
6640 else if (cpu_unit_reservation_p (state, _0mmi_))
6642 else if (cpu_unit_reservation_p (state, _0mfi_))
6644 else if (cpu_unit_reservation_p (state, _0mmf_))
6646 else if (cpu_unit_reservation_p (state, _0bbb_))
6648 else if (cpu_unit_reservation_p (state, _0mbb_))
6650 else if (cpu_unit_reservation_p (state, _0mib_))
6652 else if (cpu_unit_reservation_p (state, _0mmb_))
6654 else if (cpu_unit_reservation_p (state, _0mfb_))
6656 else if (cpu_unit_reservation_p (state, _0mlx_))
6661 if (cpu_unit_reservation_p (state, _1mii_))
6663 else if (cpu_unit_reservation_p (state, _1mmi_))
6665 else if (cpu_unit_reservation_p (state, _1mfi_))
6667 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6669 else if (cpu_unit_reservation_p (state, _1bbb_))
6671 else if (cpu_unit_reservation_p (state, _1mbb_))
6673 else if (cpu_unit_reservation_p (state, _1mib_))
6675 else if (cpu_unit_reservation_p (state, _1mmb_))
6677 else if (cpu_unit_reservation_p (state, _1mfb_))
6679 else if (cpu_unit_reservation_p (state, _1mlx_))
6688 /* The following function returns an insn important for insn bundling
6689 followed by INSN and before TAIL. */
6692 get_next_important_insn (insn, tail)
6695 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6697 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6698 && GET_CODE (PATTERN (insn)) != USE
6699 && GET_CODE (PATTERN (insn)) != CLOBBER)
6704 /* The following function does insn bundling. Bundling algorithm is
6705 based on dynamic programming. It tries to insert different number of
6706 nop insns before/after the real insns. At the end of EBB, it chooses the
6707 best alternative and then, moving back in EBB, inserts templates for
6708 the best alternative. The algorithm is directed by information
6709 (changes of simulated processor cycle) created by the 2nd insn
6713 bundling (dump, verbose, prev_head_insn, tail)
6716 rtx prev_head_insn, tail;
6718 struct bundle_state *curr_state, *next_state, *best_state;
6719 rtx insn, next_insn;
6721 int i, bundle_end_p, only_bundle_end_p, asm_p;
6722 int pos = 0, max_pos, template0, template1;
6725 enum attr_type type;
6728 for (insn = NEXT_INSN (prev_head_insn);
6729 insn && insn != tail;
6730 insn = NEXT_INSN (insn))
6736 dfa_clean_insn_cache ();
6737 initiate_bundle_state_table ();
6738 index_to_bundle_states = xmalloc ((insn_num + 2)
6739 * sizeof (struct bundle_state *));
6740 /* First (forward) pass -- generates states. */
6741 curr_state = get_free_bundle_state ();
6742 curr_state->insn = NULL;
6743 curr_state->before_nops_num = 0;
6744 curr_state->after_nops_num = 0;
6745 curr_state->insn_num = 0;
6746 curr_state->cost = 0;
6747 curr_state->accumulated_insns_num = 0;
6748 curr_state->branch_deviation = 0;
6749 curr_state->next = NULL;
6750 curr_state->originator = NULL;
6751 state_reset (curr_state->dfa_state);
6752 index_to_bundle_states [0] = curr_state;
6754 for (insn = NEXT_INSN (prev_head_insn);
6756 insn = NEXT_INSN (insn))
6758 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6759 || GET_CODE (PATTERN (insn)) == USE
6760 || GET_CODE (PATTERN (insn)) == CLOBBER)
6761 && GET_MODE (insn) == TImode)
6763 PUT_MODE (insn, VOIDmode);
6764 for (next_insn = NEXT_INSN (insn);
6766 next_insn = NEXT_INSN (next_insn))
6767 if (INSN_P (next_insn)
6768 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6769 && GET_CODE (PATTERN (next_insn)) != USE
6770 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6772 PUT_MODE (next_insn, TImode);
6776 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6781 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6782 || GET_CODE (PATTERN (insn)) == USE
6783 || GET_CODE (PATTERN (insn)) == CLOBBER)
6785 type = ia64_safe_type (insn);
6786 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6788 index_to_bundle_states [insn_num] = NULL;
6789 for (curr_state = index_to_bundle_states [insn_num - 1];
6791 curr_state = next_state)
6793 pos = curr_state->accumulated_insns_num % 3;
6794 next_state = curr_state->next;
6795 /* Finish the current bundle in order to start a subsequent
6796 asm insn in a new bundle. */
6798 = (next_insn != NULL_RTX
6799 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6800 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6802 = (only_bundle_end_p || next_insn == NULL_RTX
6803 || (GET_MODE (next_insn) == TImode
6804 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6805 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6807 /* We need to insert 2 Nops for cases like M_MII. */
6808 || (type == TYPE_M && ia64_tune == PROCESSOR_ITANIUM
6809 && !bundle_end_p && pos == 1))
6810 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6812 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6814 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6817 if (index_to_bundle_states [insn_num] == NULL)
6819 for (curr_state = index_to_bundle_states [insn_num];
6821 curr_state = curr_state->next)
6822 if (verbose >= 2 && dump)
6826 unsigned short one_automaton_state;
6827 unsigned short oneb_automaton_state;
6828 unsigned short two_automaton_state;
6829 unsigned short twob_automaton_state;
6834 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6835 curr_state->unique_num,
6836 (curr_state->originator == NULL
6837 ? -1 : curr_state->originator->unique_num),
6839 curr_state->before_nops_num, curr_state->after_nops_num,
6840 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6841 (ia64_tune == PROCESSOR_ITANIUM
6842 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6843 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6847 if (index_to_bundle_states [insn_num] == NULL)
6849 /* Finding state with a minimal cost: */
6851 for (curr_state = index_to_bundle_states [insn_num];
6853 curr_state = curr_state->next)
6854 if (curr_state->accumulated_insns_num % 3 == 0
6855 && (best_state == NULL || best_state->cost > curr_state->cost
6856 || (best_state->cost == curr_state->cost
6857 && (curr_state->accumulated_insns_num
6858 < best_state->accumulated_insns_num
6859 || (curr_state->accumulated_insns_num
6860 == best_state->accumulated_insns_num
6861 && curr_state->branch_deviation
6862 < best_state->branch_deviation)))))
6863 best_state = curr_state;
6864 /* Second (backward) pass: adding nops and templates: */
6865 insn_num = best_state->before_nops_num;
6866 template0 = template1 = -1;
6867 for (curr_state = best_state;
6868 curr_state->originator != NULL;
6869 curr_state = curr_state->originator)
6871 insn = curr_state->insn;
6872 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6873 || asm_noperands (PATTERN (insn)) >= 0);
6875 if (verbose >= 2 && dump)
6879 unsigned short one_automaton_state;
6880 unsigned short oneb_automaton_state;
6881 unsigned short two_automaton_state;
6882 unsigned short twob_automaton_state;
6887 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6888 curr_state->unique_num,
6889 (curr_state->originator == NULL
6890 ? -1 : curr_state->originator->unique_num),
6892 curr_state->before_nops_num, curr_state->after_nops_num,
6893 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6894 (ia64_tune == PROCESSOR_ITANIUM
6895 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6896 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6899 max_pos = get_max_pos (curr_state->dfa_state);
6900 if (max_pos == 6 || (max_pos == 3 && template0 < 0))
6904 template0 = get_template (curr_state->dfa_state, 3);
6907 template1 = get_template (curr_state->dfa_state, 3);
6908 template0 = get_template (curr_state->dfa_state, 6);
6911 if (max_pos > 3 && template1 < 0)
6915 template1 = get_template (curr_state->dfa_state, 3);
6919 for (i = 0; i < curr_state->after_nops_num; i++)
6922 emit_insn_after (nop, insn);
6930 b = gen_bundle_selector (GEN_INT (template0));
6931 ia64_emit_insn_before (b, nop);
6932 template0 = template1;
6936 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6937 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6938 && asm_noperands (PATTERN (insn)) < 0)
6940 if (ia64_safe_type (insn) == TYPE_L)
6945 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6946 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6947 && asm_noperands (PATTERN (insn)) < 0)
6951 b = gen_bundle_selector (GEN_INT (template0));
6952 ia64_emit_insn_before (b, insn);
6953 b = PREV_INSN (insn);
6955 template0 = template1;
6958 for (i = 0; i < curr_state->before_nops_num; i++)
6961 ia64_emit_insn_before (nop, insn);
6962 nop = PREV_INSN (insn);
6971 b = gen_bundle_selector (GEN_INT (template0));
6972 ia64_emit_insn_before (b, insn);
6973 b = PREV_INSN (insn);
6975 template0 = template1;
6980 if (ia64_tune == PROCESSOR_ITANIUM)
6981 /* Insert additional cycles for MM-insns: */
6982 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6987 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6988 || GET_CODE (PATTERN (insn)) == USE
6989 || GET_CODE (PATTERN (insn)) == CLOBBER)
6991 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6992 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
6998 last = prev_active_insn (insn);
6999 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
7001 last = prev_active_insn (last);
7003 for (;; last = prev_active_insn (last))
7004 if (recog_memoized (last) == CODE_FOR_bundle_selector)
7006 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
7009 = gen_bundle_selector (GEN_INT (2)); /* -> MFI */
7012 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7014 if ((pred_stop_p && n == 0) || n > 2
7015 || (template0 == 9 && n != 0))
7017 for (j = 3 - n; j > 0; j --)
7018 ia64_emit_insn_before (gen_nop (), insn);
7019 add_cycles [INSN_UID (insn)]--;
7020 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
7021 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7024 add_cycles [INSN_UID (insn)]--;
7025 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
7027 /* Insert .MII bundle. */
7028 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (0)),
7030 ia64_emit_insn_before (gen_nop (), insn);
7031 ia64_emit_insn_before (gen_nop (), insn);
7034 ia64_emit_insn_before
7035 (gen_insn_group_barrier (GEN_INT (3)), insn);
7038 ia64_emit_insn_before (gen_nop (), insn);
7039 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7042 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
7044 for (j = n; j > 0; j --)
7045 ia64_emit_insn_before (gen_nop (), insn);
7047 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7051 free (index_to_bundle_states);
7052 finish_bundle_state_table ();
7054 dfa_clean_insn_cache ();
7057 /* The following function is called at the end of scheduling BB or
7058 EBB. After reload, it inserts stop bits and does insn bundling. */
7061 ia64_sched_finish (dump, sched_verbose)
7066 fprintf (dump, "// Finishing schedule.\n");
7067 if (!reload_completed)
7069 if (reload_completed)
7071 final_emit_insn_group_barriers (dump);
7072 bundling (dump, sched_verbose, current_sched_info->prev_head,
7073 current_sched_info->next_tail);
7074 if (sched_verbose && dump)
7075 fprintf (dump, "// finishing %d-%d\n",
7076 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
7077 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
7083 /* The following function inserts stop bits in scheduled BB or EBB. */
7086 final_emit_insn_group_barriers (dump)
7087 FILE *dump ATTRIBUTE_UNUSED;
7090 int need_barrier_p = 0;
7091 rtx prev_insn = NULL_RTX;
7093 init_insn_group_barriers ();
7095 for (insn = NEXT_INSN (current_sched_info->prev_head);
7096 insn != current_sched_info->next_tail;
7097 insn = NEXT_INSN (insn))
7099 if (GET_CODE (insn) == BARRIER)
7101 rtx last = prev_active_insn (insn);
7105 if (GET_CODE (last) == JUMP_INSN
7106 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7107 last = prev_active_insn (last);
7108 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7109 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7111 init_insn_group_barriers ();
7113 prev_insn = NULL_RTX;
7115 else if (INSN_P (insn))
7117 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7119 init_insn_group_barriers ();
7121 prev_insn = NULL_RTX;
7123 else if (need_barrier_p || group_barrier_needed_p (insn))
7125 if (TARGET_EARLY_STOP_BITS)
7130 last != current_sched_info->prev_head;
7131 last = PREV_INSN (last))
7132 if (INSN_P (last) && GET_MODE (last) == TImode
7133 && stops_p [INSN_UID (last)])
7135 if (last == current_sched_info->prev_head)
7137 last = prev_active_insn (last);
7139 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
7140 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7142 init_insn_group_barriers ();
7143 for (last = NEXT_INSN (last);
7145 last = NEXT_INSN (last))
7147 group_barrier_needed_p (last);
7151 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7153 init_insn_group_barriers ();
7155 group_barrier_needed_p (insn);
7156 prev_insn = NULL_RTX;
7158 else if (recog_memoized (insn) >= 0)
7160 need_barrier_p = (GET_CODE (insn) == CALL_INSN
7161 || GET_CODE (PATTERN (insn)) == ASM_INPUT
7162 || asm_noperands (PATTERN (insn)) >= 0);
7169 /* If the following function returns TRUE, we will use the the DFA
7173 ia64_use_dfa_pipeline_interface ()
7178 /* If the following function returns TRUE, we will use the the DFA
7182 ia64_first_cycle_multipass_dfa_lookahead ()
7184 return (reload_completed ? 6 : 4);
7187 /* The following function initiates variable `dfa_pre_cycle_insn'. */
7190 ia64_init_dfa_pre_cycle_insn ()
7192 if (temp_dfa_state == NULL)
7194 dfa_state_size = state_size ();
7195 temp_dfa_state = xmalloc (dfa_state_size);
7196 prev_cycle_state = xmalloc (dfa_state_size);
7198 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7199 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7200 recog_memoized (dfa_pre_cycle_insn);
7201 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7202 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7203 recog_memoized (dfa_stop_insn);
7206 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7207 used by the DFA insn scheduler. */
7210 ia64_dfa_pre_cycle_insn ()
7212 return dfa_pre_cycle_insn;
7215 /* The following function returns TRUE if PRODUCER (of type ilog or
7216 ld) produces address for CONSUMER (of type st or stf). */
7219 ia64_st_address_bypass_p (producer, consumer)
7225 if (producer == NULL_RTX || consumer == NULL_RTX)
7227 dest = ia64_single_set (producer);
7228 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7229 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7231 if (GET_CODE (reg) == SUBREG)
7232 reg = SUBREG_REG (reg);
7233 dest = ia64_single_set (consumer);
7234 if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
7235 || GET_CODE (mem) != MEM)
7237 return reg_mentioned_p (reg, mem);
7240 /* The following function returns TRUE if PRODUCER (of type ilog or
7241 ld) produces address for CONSUMER (of type ld or fld). */
7244 ia64_ld_address_bypass_p (producer, consumer)
7248 rtx dest, src, reg, mem;
7250 if (producer == NULL_RTX || consumer == NULL_RTX)
7252 dest = ia64_single_set (producer);
7253 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7254 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7256 if (GET_CODE (reg) == SUBREG)
7257 reg = SUBREG_REG (reg);
7258 src = ia64_single_set (consumer);
7259 if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
7261 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7262 mem = XVECEXP (mem, 0, 0);
7263 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7264 mem = XEXP (mem, 0);
7266 /* Note that LO_SUM is used for GOT loads. */
7267 if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
7270 return reg_mentioned_p (reg, mem);
7273 /* The following function returns TRUE if INSN produces address for a
7274 load/store insn. We will place such insns into M slot because it
7275 decreases its latency time. */
7278 ia64_produce_address_p (insn)
7285 /* Emit pseudo-ops for the assembler to describe predicate relations.
7286 At present this assumes that we only consider predicate pairs to
7287 be mutex, and that the assembler can deduce proper values from
7288 straight-line code. */
7291 emit_predicate_relation_info ()
7295 FOR_EACH_BB_REVERSE (bb)
7298 rtx head = bb->head;
7300 /* We only need such notes at code labels. */
7301 if (GET_CODE (head) != CODE_LABEL)
7303 if (GET_CODE (NEXT_INSN (head)) == NOTE
7304 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7305 head = NEXT_INSN (head);
7307 for (r = PR_REG (0); r < PR_REG (64); r += 2)
7308 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7310 rtx p = gen_rtx_REG (BImode, r);
7311 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7312 if (head == bb->end)
7318 /* Look for conditional calls that do not return, and protect predicate
7319 relations around them. Otherwise the assembler will assume the call
7320 returns, and complain about uses of call-clobbered predicates after
7322 FOR_EACH_BB_REVERSE (bb)
7324 rtx insn = bb->head;
7328 if (GET_CODE (insn) == CALL_INSN
7329 && GET_CODE (PATTERN (insn)) == COND_EXEC
7330 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7332 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7333 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7334 if (bb->head == insn)
7336 if (bb->end == insn)
7340 if (insn == bb->end)
7342 insn = NEXT_INSN (insn);
7347 /* Perform machine dependent operations on the rtl chain INSNS. */
7352 /* We are freeing block_for_insn in the toplev to keep compatibility
7353 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7354 compute_bb_for_insn ();
7356 /* If optimizing, we'll have split before scheduling. */
7358 split_all_insns (0);
7360 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7361 non-optimizing bootstrap. */
7362 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7364 if (ia64_flag_schedule_insns2)
7366 timevar_push (TV_SCHED2);
7367 ia64_final_schedule = 1;
7369 initiate_bundle_states ();
7370 ia64_nop = make_insn_raw (gen_nop ());
7371 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7372 recog_memoized (ia64_nop);
7373 clocks_length = get_max_uid () + 1;
7374 stops_p = (char *) xmalloc (clocks_length);
7375 memset (stops_p, 0, clocks_length);
7376 if (ia64_tune == PROCESSOR_ITANIUM)
7378 clocks = (int *) xmalloc (clocks_length * sizeof (int));
7379 memset (clocks, 0, clocks_length * sizeof (int));
7380 add_cycles = (int *) xmalloc (clocks_length * sizeof (int));
7381 memset (add_cycles, 0, clocks_length * sizeof (int));
7383 if (ia64_tune == PROCESSOR_ITANIUM2)
7385 pos_1 = get_cpu_unit_code ("2_1");
7386 pos_2 = get_cpu_unit_code ("2_2");
7387 pos_3 = get_cpu_unit_code ("2_3");
7388 pos_4 = get_cpu_unit_code ("2_4");
7389 pos_5 = get_cpu_unit_code ("2_5");
7390 pos_6 = get_cpu_unit_code ("2_6");
7391 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7392 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7393 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7394 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7395 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7396 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7397 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7398 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7399 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7400 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7401 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7402 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7403 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7404 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7405 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7406 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7407 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7408 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7409 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7410 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7414 pos_1 = get_cpu_unit_code ("1_1");
7415 pos_2 = get_cpu_unit_code ("1_2");
7416 pos_3 = get_cpu_unit_code ("1_3");
7417 pos_4 = get_cpu_unit_code ("1_4");
7418 pos_5 = get_cpu_unit_code ("1_5");
7419 pos_6 = get_cpu_unit_code ("1_6");
7420 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7421 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7422 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7423 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7424 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7425 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7426 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7427 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7428 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7429 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7430 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7431 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7432 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7433 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7434 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7435 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7436 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7437 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7438 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7439 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7441 schedule_ebbs (rtl_dump_file);
7442 finish_bundle_states ();
7443 if (ia64_tune == PROCESSOR_ITANIUM)
7449 emit_insn_group_barriers (rtl_dump_file);
7451 ia64_final_schedule = 0;
7452 timevar_pop (TV_SCHED2);
7455 emit_all_insn_group_barriers (rtl_dump_file);
7457 /* A call must not be the last instruction in a function, so that the
7458 return address is still within the function, so that unwinding works
7459 properly. Note that IA-64 differs from dwarf2 on this point. */
7460 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7465 insn = get_last_insn ();
7466 if (! INSN_P (insn))
7467 insn = prev_active_insn (insn);
7468 if (GET_CODE (insn) == INSN
7469 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7470 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7473 insn = prev_active_insn (insn);
7475 if (GET_CODE (insn) == CALL_INSN)
7478 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7479 emit_insn (gen_break_f ());
7480 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7485 emit_predicate_relation_info ();
7488 /* Return true if REGNO is used by the epilogue. */
7491 ia64_epilogue_uses (regno)
7497 /* With a call to a function in another module, we will write a new
7498 value to "gp". After returning from such a call, we need to make
7499 sure the function restores the original gp-value, even if the
7500 function itself does not use the gp anymore. */
7501 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7503 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7504 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7505 /* For functions defined with the syscall_linkage attribute, all
7506 input registers are marked as live at all function exits. This
7507 prevents the register allocator from using the input registers,
7508 which in turn makes it possible to restart a system call after
7509 an interrupt without having to save/restore the input registers.
7510 This also prevents kernel data from leaking to application code. */
7511 return lookup_attribute ("syscall_linkage",
7512 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7515 /* Conditional return patterns can't represent the use of `b0' as
7516 the return address, so we force the value live this way. */
7520 /* Likewise for ar.pfs, which is used by br.ret. */
7528 /* Return true if REGNO is used by the frame unwinder. */
7531 ia64_eh_uses (regno)
7534 if (! reload_completed)
7537 if (current_frame_info.reg_save_b0
7538 && regno == current_frame_info.reg_save_b0)
7540 if (current_frame_info.reg_save_pr
7541 && regno == current_frame_info.reg_save_pr)
7543 if (current_frame_info.reg_save_ar_pfs
7544 && regno == current_frame_info.reg_save_ar_pfs)
7546 if (current_frame_info.reg_save_ar_unat
7547 && regno == current_frame_info.reg_save_ar_unat)
7549 if (current_frame_info.reg_save_ar_lc
7550 && regno == current_frame_info.reg_save_ar_lc)
7556 /* Return true if this goes in small data/bss. */
7558 /* ??? We could also support own long data here. Generating movl/add/ld8
7559 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7560 code faster because there is one less load. This also includes incomplete
7561 types which can't go in sdata/sbss. */
7564 ia64_in_small_data_p (exp)
7567 if (TARGET_NO_SDATA)
7570 /* We want to merge strings, so we never consider them small data. */
7571 if (TREE_CODE (exp) == STRING_CST)
7574 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7576 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7577 if (strcmp (section, ".sdata") == 0
7578 || strcmp (section, ".sbss") == 0)
7583 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7585 /* If this is an incomplete type with size 0, then we can't put it
7586 in sdata because it might be too big when completed. */
7587 if (size > 0 && size <= ia64_section_threshold)
7594 /* Output assembly directives for prologue regions. */
7596 /* The current basic block number. */
7598 static bool last_block;
7600 /* True if we need a copy_state command at the start of the next block. */
7602 static bool need_copy_state;
7604 /* The function emits unwind directives for the start of an epilogue. */
7609 /* If this isn't the last block of the function, then we need to label the
7610 current state, and copy it back in at the start of the next block. */
7614 fprintf (asm_out_file, "\t.label_state 1\n");
7615 need_copy_state = true;
7618 fprintf (asm_out_file, "\t.restore sp\n");
7621 /* This function processes a SET pattern looking for specific patterns
7622 which result in emitting an assembly directive required for unwinding. */
7625 process_set (asm_out_file, pat)
7629 rtx src = SET_SRC (pat);
7630 rtx dest = SET_DEST (pat);
7631 int src_regno, dest_regno;
7633 /* Look for the ALLOC insn. */
7634 if (GET_CODE (src) == UNSPEC_VOLATILE
7635 && XINT (src, 1) == UNSPECV_ALLOC
7636 && GET_CODE (dest) == REG)
7638 dest_regno = REGNO (dest);
7640 /* If this isn't the final destination for ar.pfs, the alloc
7641 shouldn't have been marked frame related. */
7642 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7645 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7646 ia64_dbx_register_number (dest_regno));
7650 /* Look for SP = .... */
7651 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7653 if (GET_CODE (src) == PLUS)
7655 rtx op0 = XEXP (src, 0);
7656 rtx op1 = XEXP (src, 1);
7657 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7659 if (INTVAL (op1) < 0)
7660 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7663 process_epilogue ();
7668 else if (GET_CODE (src) == REG
7669 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7670 process_epilogue ();
7677 /* Register move we need to look at. */
7678 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7680 src_regno = REGNO (src);
7681 dest_regno = REGNO (dest);
7686 /* Saving return address pointer. */
7687 if (dest_regno != current_frame_info.reg_save_b0)
7689 fprintf (asm_out_file, "\t.save rp, r%d\n",
7690 ia64_dbx_register_number (dest_regno));
7694 if (dest_regno != current_frame_info.reg_save_pr)
7696 fprintf (asm_out_file, "\t.save pr, r%d\n",
7697 ia64_dbx_register_number (dest_regno));
7700 case AR_UNAT_REGNUM:
7701 if (dest_regno != current_frame_info.reg_save_ar_unat)
7703 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7704 ia64_dbx_register_number (dest_regno));
7708 if (dest_regno != current_frame_info.reg_save_ar_lc)
7710 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7711 ia64_dbx_register_number (dest_regno));
7714 case STACK_POINTER_REGNUM:
7715 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7716 || ! frame_pointer_needed)
7718 fprintf (asm_out_file, "\t.vframe r%d\n",
7719 ia64_dbx_register_number (dest_regno));
7723 /* Everything else should indicate being stored to memory. */
7728 /* Memory store we need to look at. */
7729 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7735 if (GET_CODE (XEXP (dest, 0)) == REG)
7737 base = XEXP (dest, 0);
7740 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7741 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7743 base = XEXP (XEXP (dest, 0), 0);
7744 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7749 if (base == hard_frame_pointer_rtx)
7751 saveop = ".savepsp";
7754 else if (base == stack_pointer_rtx)
7759 src_regno = REGNO (src);
7763 if (current_frame_info.reg_save_b0 != 0)
7765 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7769 if (current_frame_info.reg_save_pr != 0)
7771 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7775 if (current_frame_info.reg_save_ar_lc != 0)
7777 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7781 if (current_frame_info.reg_save_ar_pfs != 0)
7783 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7786 case AR_UNAT_REGNUM:
7787 if (current_frame_info.reg_save_ar_unat != 0)
7789 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7796 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7797 1 << (src_regno - GR_REG (4)));
7805 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7806 1 << (src_regno - BR_REG (1)));
7813 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7814 1 << (src_regno - FR_REG (2)));
7817 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7818 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7819 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7820 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7821 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7822 1 << (src_regno - FR_REG (12)));
7834 /* This function looks at a single insn and emits any directives
7835 required to unwind this insn. */
7837 process_for_unwind_directive (asm_out_file, insn)
7841 if (flag_unwind_tables
7842 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7846 if (GET_CODE (insn) == NOTE
7847 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7849 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7851 /* Restore unwind state from immediately before the epilogue. */
7852 if (need_copy_state)
7854 fprintf (asm_out_file, "\t.body\n");
7855 fprintf (asm_out_file, "\t.copy_state 1\n");
7856 need_copy_state = false;
7860 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7863 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7865 pat = XEXP (pat, 0);
7867 pat = PATTERN (insn);
7869 switch (GET_CODE (pat))
7872 process_set (asm_out_file, pat);
7878 int limit = XVECLEN (pat, 0);
7879 for (par_index = 0; par_index < limit; par_index++)
7881 rtx x = XVECEXP (pat, 0, par_index);
7882 if (GET_CODE (x) == SET)
7883 process_set (asm_out_file, x);
7896 ia64_init_builtins ()
7898 tree psi_type_node = build_pointer_type (integer_type_node);
7899 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7901 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7902 tree si_ftype_psi_si_si
7903 = build_function_type_list (integer_type_node,
7904 psi_type_node, integer_type_node,
7905 integer_type_node, NULL_TREE);
7907 /* __sync_val_compare_and_swap_di */
7908 tree di_ftype_pdi_di_di
7909 = build_function_type_list (long_integer_type_node,
7910 pdi_type_node, long_integer_type_node,
7911 long_integer_type_node, NULL_TREE);
7912 /* __sync_bool_compare_and_swap_di */
7913 tree si_ftype_pdi_di_di
7914 = build_function_type_list (integer_type_node,
7915 pdi_type_node, long_integer_type_node,
7916 long_integer_type_node, NULL_TREE);
7917 /* __sync_synchronize */
7918 tree void_ftype_void
7919 = build_function_type (void_type_node, void_list_node);
7921 /* __sync_lock_test_and_set_si */
7922 tree si_ftype_psi_si
7923 = build_function_type_list (integer_type_node,
7924 psi_type_node, integer_type_node, NULL_TREE);
7926 /* __sync_lock_test_and_set_di */
7927 tree di_ftype_pdi_di
7928 = build_function_type_list (long_integer_type_node,
7929 pdi_type_node, long_integer_type_node,
7932 /* __sync_lock_release_si */
7934 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7936 /* __sync_lock_release_di */
7938 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7940 #define def_builtin(name, type, code) \
7941 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
7943 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7944 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7945 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7946 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7947 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7948 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7949 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
7950 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7952 def_builtin ("__sync_synchronize", void_ftype_void,
7953 IA64_BUILTIN_SYNCHRONIZE);
7955 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7956 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7957 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7958 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7959 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7960 IA64_BUILTIN_LOCK_RELEASE_SI);
7961 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7962 IA64_BUILTIN_LOCK_RELEASE_DI);
7964 def_builtin ("__builtin_ia64_bsp",
7965 build_function_type (ptr_type_node, void_list_node),
7968 def_builtin ("__builtin_ia64_flushrs",
7969 build_function_type (void_type_node, void_list_node),
7970 IA64_BUILTIN_FLUSHRS);
7972 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7973 IA64_BUILTIN_FETCH_AND_ADD_SI);
7974 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7975 IA64_BUILTIN_FETCH_AND_SUB_SI);
7976 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7977 IA64_BUILTIN_FETCH_AND_OR_SI);
7978 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7979 IA64_BUILTIN_FETCH_AND_AND_SI);
7980 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7981 IA64_BUILTIN_FETCH_AND_XOR_SI);
7982 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7983 IA64_BUILTIN_FETCH_AND_NAND_SI);
7985 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7986 IA64_BUILTIN_ADD_AND_FETCH_SI);
7987 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7988 IA64_BUILTIN_SUB_AND_FETCH_SI);
7989 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7990 IA64_BUILTIN_OR_AND_FETCH_SI);
7991 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7992 IA64_BUILTIN_AND_AND_FETCH_SI);
7993 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7994 IA64_BUILTIN_XOR_AND_FETCH_SI);
7995 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7996 IA64_BUILTIN_NAND_AND_FETCH_SI);
7998 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7999 IA64_BUILTIN_FETCH_AND_ADD_DI);
8000 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
8001 IA64_BUILTIN_FETCH_AND_SUB_DI);
8002 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
8003 IA64_BUILTIN_FETCH_AND_OR_DI);
8004 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
8005 IA64_BUILTIN_FETCH_AND_AND_DI);
8006 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
8007 IA64_BUILTIN_FETCH_AND_XOR_DI);
8008 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
8009 IA64_BUILTIN_FETCH_AND_NAND_DI);
8011 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
8012 IA64_BUILTIN_ADD_AND_FETCH_DI);
8013 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
8014 IA64_BUILTIN_SUB_AND_FETCH_DI);
8015 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
8016 IA64_BUILTIN_OR_AND_FETCH_DI);
8017 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
8018 IA64_BUILTIN_AND_AND_FETCH_DI);
8019 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
8020 IA64_BUILTIN_XOR_AND_FETCH_DI);
8021 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
8022 IA64_BUILTIN_NAND_AND_FETCH_DI);
8027 /* Expand fetch_and_op intrinsics. The basic code sequence is:
8035 cmpxchgsz.acq tmp = [ptr], tmp
8036 } while (tmp != ret)
8040 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
8042 enum machine_mode mode;
8046 rtx ret, label, tmp, ccv, insn, mem, value;
8049 arg0 = TREE_VALUE (arglist);
8050 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8051 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8052 #ifdef POINTERS_EXTEND_UNSIGNED
8053 if (GET_MODE(mem) != Pmode)
8054 mem = convert_memory_address (Pmode, mem);
8056 value = expand_expr (arg1, NULL_RTX, mode, 0);
8058 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8059 MEM_VOLATILE_P (mem) = 1;
8061 if (target && register_operand (target, mode))
8064 ret = gen_reg_rtx (mode);
8066 emit_insn (gen_mf ());
8068 /* Special case for fetchadd instructions. */
8069 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
8072 insn = gen_fetchadd_acq_si (ret, mem, value);
8074 insn = gen_fetchadd_acq_di (ret, mem, value);
8079 tmp = gen_reg_rtx (mode);
8080 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
8081 emit_move_insn (tmp, mem);
8083 label = gen_label_rtx ();
8085 emit_move_insn (ret, tmp);
8086 emit_move_insn (ccv, tmp);
8088 /* Perform the specific operation. Special case NAND by noticing
8089 one_cmpl_optab instead. */
8090 if (binoptab == one_cmpl_optab)
8092 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8093 binoptab = and_optab;
8095 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
8098 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
8100 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
8103 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
8108 /* Expand op_and_fetch intrinsics. The basic code sequence is:
8115 ret = tmp <op> value;
8116 cmpxchgsz.acq tmp = [ptr], ret
8117 } while (tmp != old)
8121 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
8123 enum machine_mode mode;
8127 rtx old, label, tmp, ret, ccv, insn, mem, value;
8130 arg0 = TREE_VALUE (arglist);
8131 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8132 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8133 #ifdef POINTERS_EXTEND_UNSIGNED
8134 if (GET_MODE(mem) != Pmode)
8135 mem = convert_memory_address (Pmode, mem);
8138 value = expand_expr (arg1, NULL_RTX, mode, 0);
8140 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8141 MEM_VOLATILE_P (mem) = 1;
8143 if (target && ! register_operand (target, mode))
8146 emit_insn (gen_mf ());
8147 tmp = gen_reg_rtx (mode);
8148 old = gen_reg_rtx (mode);
8149 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
8151 emit_move_insn (tmp, mem);
8153 label = gen_label_rtx ();
8155 emit_move_insn (old, tmp);
8156 emit_move_insn (ccv, tmp);
8158 /* Perform the specific operation. Special case NAND by noticing
8159 one_cmpl_optab instead. */
8160 if (binoptab == one_cmpl_optab)
8162 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8163 binoptab = and_optab;
8165 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
8168 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
8170 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
8173 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
8178 /* Expand val_ and bool_compare_and_swap. For val_ we want:
8182 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
8185 For bool_ it's the same except return ret == oldval.
8189 ia64_expand_compare_and_swap (rmode, mode, boolp, arglist, target)
8190 enum machine_mode rmode;
8191 enum machine_mode mode;
8196 tree arg0, arg1, arg2;
8197 rtx mem, old, new, ccv, tmp, insn;
8199 arg0 = TREE_VALUE (arglist);
8200 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8201 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8202 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8203 old = expand_expr (arg1, NULL_RTX, mode, 0);
8204 new = expand_expr (arg2, NULL_RTX, mode, 0);
8206 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8207 MEM_VOLATILE_P (mem) = 1;
8209 if (! register_operand (old, mode))
8210 old = copy_to_mode_reg (mode, old);
8211 if (! register_operand (new, mode))
8212 new = copy_to_mode_reg (mode, new);
8214 if (! boolp && target && register_operand (target, mode))
8217 tmp = gen_reg_rtx (mode);
8219 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8221 emit_move_insn (ccv, old);
8224 rtx ccvtmp = gen_reg_rtx (DImode);
8225 emit_insn (gen_zero_extendsidi2 (ccvtmp, old));
8226 emit_move_insn (ccv, ccvtmp);
8228 emit_insn (gen_mf ());
8230 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
8232 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
8238 target = gen_reg_rtx (rmode);
8239 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
8245 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
8248 ia64_expand_lock_test_and_set (mode, arglist, target)
8249 enum machine_mode mode;
8254 rtx mem, new, ret, insn;
8256 arg0 = TREE_VALUE (arglist);
8257 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8258 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8259 new = expand_expr (arg1, NULL_RTX, mode, 0);
8261 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8262 MEM_VOLATILE_P (mem) = 1;
8263 if (! register_operand (new, mode))
8264 new = copy_to_mode_reg (mode, new);
8266 if (target && register_operand (target, mode))
8269 ret = gen_reg_rtx (mode);
8272 insn = gen_xchgsi (ret, mem, new);
8274 insn = gen_xchgdi (ret, mem, new);
8280 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
8283 ia64_expand_lock_release (mode, arglist, target)
8284 enum machine_mode mode;
8286 rtx target ATTRIBUTE_UNUSED;
8291 arg0 = TREE_VALUE (arglist);
8292 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8294 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8295 MEM_VOLATILE_P (mem) = 1;
8297 emit_move_insn (mem, const0_rtx);
8303 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
8306 rtx subtarget ATTRIBUTE_UNUSED;
8307 enum machine_mode mode ATTRIBUTE_UNUSED;
8308 int ignore ATTRIBUTE_UNUSED;
8310 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8311 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8312 tree arglist = TREE_OPERAND (exp, 1);
8313 enum machine_mode rmode = VOIDmode;
8317 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8318 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8323 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8324 case IA64_BUILTIN_LOCK_RELEASE_SI:
8325 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8326 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8327 case IA64_BUILTIN_FETCH_AND_OR_SI:
8328 case IA64_BUILTIN_FETCH_AND_AND_SI:
8329 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8330 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8331 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8332 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8333 case IA64_BUILTIN_OR_AND_FETCH_SI:
8334 case IA64_BUILTIN_AND_AND_FETCH_SI:
8335 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8336 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8340 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8345 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8350 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8351 case IA64_BUILTIN_LOCK_RELEASE_DI:
8352 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8353 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8354 case IA64_BUILTIN_FETCH_AND_OR_DI:
8355 case IA64_BUILTIN_FETCH_AND_AND_DI:
8356 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8357 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8358 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8359 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8360 case IA64_BUILTIN_OR_AND_FETCH_DI:
8361 case IA64_BUILTIN_AND_AND_FETCH_DI:
8362 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8363 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8373 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8374 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8375 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8378 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8379 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8380 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8383 case IA64_BUILTIN_SYNCHRONIZE:
8384 emit_insn (gen_mf ());
8387 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8388 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8389 return ia64_expand_lock_test_and_set (mode, arglist, target);
8391 case IA64_BUILTIN_LOCK_RELEASE_SI:
8392 case IA64_BUILTIN_LOCK_RELEASE_DI:
8393 return ia64_expand_lock_release (mode, arglist, target);
8395 case IA64_BUILTIN_BSP:
8396 if (! target || ! register_operand (target, DImode))
8397 target = gen_reg_rtx (DImode);
8398 emit_insn (gen_bsp_value (target));
8399 #ifdef POINTERS_EXTEND_UNSIGNED
8400 target = convert_memory_address (ptr_mode, target);
8404 case IA64_BUILTIN_FLUSHRS:
8405 emit_insn (gen_flushrs ());
8408 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8409 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8410 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8412 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8413 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8414 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8416 case IA64_BUILTIN_FETCH_AND_OR_SI:
8417 case IA64_BUILTIN_FETCH_AND_OR_DI:
8418 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8420 case IA64_BUILTIN_FETCH_AND_AND_SI:
8421 case IA64_BUILTIN_FETCH_AND_AND_DI:
8422 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8424 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8425 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8426 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8428 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8429 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8430 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8432 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8433 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8434 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8436 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8437 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8438 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8440 case IA64_BUILTIN_OR_AND_FETCH_SI:
8441 case IA64_BUILTIN_OR_AND_FETCH_DI:
8442 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8444 case IA64_BUILTIN_AND_AND_FETCH_SI:
8445 case IA64_BUILTIN_AND_AND_FETCH_DI:
8446 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8448 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8449 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8450 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8452 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8453 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8454 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8463 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8464 most significant bits of the stack slot. */
8467 ia64_hpux_function_arg_padding (mode, type)
8468 enum machine_mode mode;
8471 /* Exception to normal case for structures/unions/etc. */
8473 if (type && AGGREGATE_TYPE_P (type)
8474 && int_size_in_bytes (type) < UNITS_PER_WORD)
8477 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
8478 hardwired to be true. */
8480 return((mode == BLKmode
8481 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8482 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
8483 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
8484 ? downward : upward);
8487 /* Linked list of all external functions that are to be emitted by GCC.
8488 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8489 order to avoid putting out names that are never really used. */
8491 struct extern_func_list
8493 struct extern_func_list *next; /* next external */
8494 char *name; /* name of the external */
8495 } *extern_func_head = 0;
8498 ia64_hpux_add_extern_decl (name)
8501 struct extern_func_list *p;
8503 p = (struct extern_func_list *) xmalloc (sizeof (struct extern_func_list));
8504 p->name = xmalloc (strlen (name) + 1);
8505 strcpy(p->name, name);
8506 p->next = extern_func_head;
8507 extern_func_head = p;
8510 /* Print out the list of used global functions. */
8513 ia64_hpux_file_end ()
8515 while (extern_func_head)
8517 const char *real_name;
8520 real_name = (* targetm.strip_name_encoding) (extern_func_head->name);
8521 decl = maybe_get_identifier (real_name);
8524 || (! TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (decl)))
8527 TREE_ASM_WRITTEN (decl) = 1;
8528 (*targetm.asm_out.globalize_label) (asm_out_file,
8529 extern_func_head->name);
8530 fputs (TYPE_ASM_OP, asm_out_file);
8531 assemble_name (asm_out_file, extern_func_head->name);
8532 putc (',', asm_out_file);
8533 fprintf (asm_out_file, TYPE_OPERAND_FMT, "function");
8534 putc ('\n', asm_out_file);
8536 extern_func_head = extern_func_head->next;
8541 /* Switch to the section to which we should output X. The only thing
8542 special we do here is to honor small data. */
8545 ia64_select_rtx_section (mode, x, align)
8546 enum machine_mode mode;
8548 unsigned HOST_WIDE_INT align;
8550 if (GET_MODE_SIZE (mode) > 0
8551 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8554 default_elf_select_rtx_section (mode, x, align);
8557 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8558 Pretend flag_pic is always set. */
8561 ia64_rwreloc_select_section (exp, reloc, align)
8564 unsigned HOST_WIDE_INT align;
8566 default_elf_select_section_1 (exp, reloc, align, true);
8570 ia64_rwreloc_unique_section (decl, reloc)
8574 default_unique_section_1 (decl, reloc, true);
8578 ia64_rwreloc_select_rtx_section (mode, x, align)
8579 enum machine_mode mode;
8581 unsigned HOST_WIDE_INT align;
8583 int save_pic = flag_pic;
8585 ia64_select_rtx_section (mode, x, align);
8586 flag_pic = save_pic;
8590 ia64_rwreloc_section_type_flags (decl, name, reloc)
8595 return default_section_type_flags_1 (decl, name, reloc, true);
8599 /* Output the assembler code for a thunk function. THUNK_DECL is the
8600 declaration for the thunk function itself, FUNCTION is the decl for
8601 the target function. DELTA is an immediate constant offset to be
8602 added to THIS. If VCALL_OFFSET is nonzero, the word at
8603 *(*this + vcall_offset) should be added to THIS. */
8606 ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
8608 tree thunk ATTRIBUTE_UNUSED;
8609 HOST_WIDE_INT delta;
8610 HOST_WIDE_INT vcall_offset;
8613 rtx this, insn, funexp;
8615 reload_completed = 1;
8616 epilogue_completed = 1;
8619 /* Set things up as ia64_expand_prologue might. */
8620 last_scratch_gr_reg = 15;
8622 memset (¤t_frame_info, 0, sizeof (current_frame_info));
8623 current_frame_info.spill_cfa_off = -16;
8624 current_frame_info.n_input_regs = 1;
8625 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8627 if (!TARGET_REG_NAMES)
8628 reg_names[IN_REG (0)] = ia64_reg_numbers[0];
8630 /* Mark the end of the (empty) prologue. */
8631 emit_note (NOTE_INSN_PROLOGUE_END);
8633 this = gen_rtx_REG (Pmode, IN_REG (0));
8635 /* Apply the constant offset, if required. */
8638 rtx delta_rtx = GEN_INT (delta);
8640 if (!CONST_OK_FOR_I (delta))
8642 rtx tmp = gen_rtx_REG (Pmode, 2);
8643 emit_move_insn (tmp, delta_rtx);
8646 emit_insn (gen_adddi3 (this, this, delta_rtx));
8649 /* Apply the offset from the vtable, if required. */
8652 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8653 rtx tmp = gen_rtx_REG (Pmode, 2);
8655 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8657 if (!CONST_OK_FOR_J (vcall_offset))
8659 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8660 emit_move_insn (tmp2, vcall_offset_rtx);
8661 vcall_offset_rtx = tmp2;
8663 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8665 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8667 emit_insn (gen_adddi3 (this, this, tmp));
8670 /* Generate a tail call to the target function. */
8671 if (! TREE_USED (function))
8673 assemble_external (function);
8674 TREE_USED (function) = 1;
8676 funexp = XEXP (DECL_RTL (function), 0);
8677 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8678 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8679 insn = get_last_insn ();
8680 SIBLING_CALL_P (insn) = 1;
8682 /* Code generation for calls relies on splitting. */
8683 reload_completed = 1;
8684 epilogue_completed = 1;
8685 try_split (PATTERN (insn), insn, 0);
8689 /* Run just enough of rest_of_compilation to get the insns emitted.
8690 There's not really enough bulk here to make other passes such as
8691 instruction scheduling worth while. Note that use_thunk calls
8692 assemble_start_function and assemble_end_function. */
8694 insn_locators_initialize ();
8695 emit_all_insn_group_barriers (NULL);
8696 insn = get_insns ();
8697 shorten_branches (insn);
8698 final_start_function (insn, file, 1);
8699 final (insn, file, 1, 0);
8700 final_end_function ();
8702 reload_completed = 0;
8703 epilogue_completed = 0;
8707 #include "gt-ia64.h"