1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
25 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "sched-int.h"
48 #include "target-def.h"
51 #include "langhooks.h"
52 #include "cfglayout.h"
54 /* This is used for communication between ASM_OUTPUT_LABEL and
55 ASM_OUTPUT_LABELREF. */
56 int ia64_asm_output_label = 0;
58 /* Define the information needed to generate branch and scc insns. This is
59 stored from the compare operation. */
60 struct rtx_def * ia64_compare_op0;
61 struct rtx_def * ia64_compare_op1;
63 /* Register names for ia64_expand_prologue. */
64 static const char * const ia64_reg_numbers[96] =
65 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
66 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
67 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
68 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
69 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
70 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
71 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
72 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
73 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
74 "r104","r105","r106","r107","r108","r109","r110","r111",
75 "r112","r113","r114","r115","r116","r117","r118","r119",
76 "r120","r121","r122","r123","r124","r125","r126","r127"};
78 /* ??? These strings could be shared with REGISTER_NAMES. */
79 static const char * const ia64_input_reg_names[8] =
80 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
82 /* ??? These strings could be shared with REGISTER_NAMES. */
83 static const char * const ia64_local_reg_names[80] =
84 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
85 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
86 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
87 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
88 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
89 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
90 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
91 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
92 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
93 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
95 /* ??? These strings could be shared with REGISTER_NAMES. */
96 static const char * const ia64_output_reg_names[8] =
97 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
99 /* String used with the -mfixed-range= option. */
100 const char *ia64_fixed_range_string;
102 /* Determines whether we use adds, addl, or movl to generate our
103 TLS immediate offsets. */
104 int ia64_tls_size = 22;
106 /* String used with the -mtls-size= option. */
107 const char *ia64_tls_size_string;
109 /* Which cpu are we scheduling for. */
110 enum processor_type ia64_tune;
112 /* String used with the -tune= option. */
113 const char *ia64_tune_string;
115 /* Determines whether we run our final scheduling pass or not. We always
116 avoid the normal second scheduling pass. */
117 static int ia64_flag_schedule_insns2;
119 /* Variables which are this size or smaller are put in the sdata/sbss
122 unsigned int ia64_section_threshold;
124 /* The following variable is used by the DFA insn scheduler. The value is
125 TRUE if we do insn bundling instead of insn scheduling. */
128 /* Structure to be filled in by ia64_compute_frame_size with register
129 save masks and offsets for the current function. */
131 struct ia64_frame_info
133 HOST_WIDE_INT total_size; /* size of the stack frame, not including
134 the caller's scratch area. */
135 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
136 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
137 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
138 HARD_REG_SET mask; /* mask of saved registers. */
139 unsigned int gr_used_mask; /* mask of registers in use as gr spill
140 registers or long-term scratches. */
141 int n_spilled; /* number of spilled registers. */
142 int reg_fp; /* register for fp. */
143 int reg_save_b0; /* save register for b0. */
144 int reg_save_pr; /* save register for prs. */
145 int reg_save_ar_pfs; /* save register for ar.pfs. */
146 int reg_save_ar_unat; /* save register for ar.unat. */
147 int reg_save_ar_lc; /* save register for ar.lc. */
148 int reg_save_gp; /* save register for gp. */
149 int n_input_regs; /* number of input registers used. */
150 int n_local_regs; /* number of local registers used. */
151 int n_output_regs; /* number of output registers used. */
152 int n_rotate_regs; /* number of rotating registers used. */
154 char need_regstk; /* true if a .regstk directive needed. */
155 char initialized; /* true if the data is finalized. */
158 /* Current frame information calculated by ia64_compute_frame_size. */
159 static struct ia64_frame_info current_frame_info;
161 static int ia64_use_dfa_pipeline_interface (void);
162 static int ia64_first_cycle_multipass_dfa_lookahead (void);
163 static void ia64_dependencies_evaluation_hook (rtx, rtx);
164 static void ia64_init_dfa_pre_cycle_insn (void);
165 static rtx ia64_dfa_pre_cycle_insn (void);
166 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
167 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
168 static rtx gen_tls_get_addr (void);
169 static rtx gen_thread_pointer (void);
170 static rtx ia64_expand_tls_address (enum tls_model, rtx, rtx);
171 static int find_gr_spill (int);
172 static int next_scratch_gr_reg (void);
173 static void mark_reg_gr_used_mask (rtx, void *);
174 static void ia64_compute_frame_size (HOST_WIDE_INT);
175 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
176 static void finish_spill_pointers (void);
177 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
178 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
179 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
180 static rtx gen_movdi_x (rtx, rtx, rtx);
181 static rtx gen_fr_spill_x (rtx, rtx, rtx);
182 static rtx gen_fr_restore_x (rtx, rtx, rtx);
184 static enum machine_mode hfa_element_mode (tree, int);
185 static bool ia64_function_ok_for_sibcall (tree, tree);
186 static bool ia64_rtx_costs (rtx, int, int, int *);
187 static void fix_range (const char *);
188 static struct machine_function * ia64_init_machine_status (void);
189 static void emit_insn_group_barriers (FILE *);
190 static void emit_all_insn_group_barriers (FILE *);
191 static void final_emit_insn_group_barriers (FILE *);
192 static void emit_predicate_relation_info (void);
193 static void ia64_reorg (void);
194 static bool ia64_in_small_data_p (tree);
195 static void process_epilogue (void);
196 static int process_set (FILE *, rtx);
198 static rtx ia64_expand_fetch_and_op (optab, enum machine_mode, tree, rtx);
199 static rtx ia64_expand_op_and_fetch (optab, enum machine_mode, tree, rtx);
200 static rtx ia64_expand_compare_and_swap (enum machine_mode, enum machine_mode,
202 static rtx ia64_expand_lock_test_and_set (enum machine_mode, tree, rtx);
203 static rtx ia64_expand_lock_release (enum machine_mode, tree, rtx);
204 static bool ia64_assemble_integer (rtx, unsigned int, int);
205 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
206 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
207 static void ia64_output_function_end_prologue (FILE *);
209 static int ia64_issue_rate (void);
210 static int ia64_adjust_cost (rtx, rtx, rtx, int);
211 static void ia64_sched_init (FILE *, int, int);
212 static void ia64_sched_finish (FILE *, int);
213 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
214 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
215 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
216 static int ia64_variable_issue (FILE *, int, rtx, int);
218 static struct bundle_state *get_free_bundle_state (void);
219 static void free_bundle_state (struct bundle_state *);
220 static void initiate_bundle_states (void);
221 static void finish_bundle_states (void);
222 static unsigned bundle_state_hash (const void *);
223 static int bundle_state_eq_p (const void *, const void *);
224 static int insert_bundle_state (struct bundle_state *);
225 static void initiate_bundle_state_table (void);
226 static void finish_bundle_state_table (void);
227 static int try_issue_nops (struct bundle_state *, int);
228 static int try_issue_insn (struct bundle_state *, rtx);
229 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
230 static int get_max_pos (state_t);
231 static int get_template (state_t, int);
233 static rtx get_next_important_insn (rtx, rtx);
234 static void bundling (FILE *, int, rtx, rtx);
236 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
237 HOST_WIDE_INT, tree);
238 static void ia64_file_start (void);
240 static void ia64_select_rtx_section (enum machine_mode, rtx,
241 unsigned HOST_WIDE_INT);
242 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
244 static void ia64_rwreloc_unique_section (tree, int)
246 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
247 unsigned HOST_WIDE_INT)
249 static unsigned int ia64_rwreloc_section_type_flags (tree, const char *, int)
252 static void ia64_hpux_add_extern_decl (const char *name)
254 static void ia64_hpux_file_end (void)
256 static void ia64_hpux_init_libfuncs (void)
258 static void ia64_vms_init_libfuncs (void)
261 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
262 static void ia64_encode_section_info (tree, rtx, int);
265 /* Table of valid machine attributes. */
266 static const struct attribute_spec ia64_attribute_table[] =
268 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
269 { "syscall_linkage", 0, 0, false, true, true, NULL },
270 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
271 { NULL, 0, 0, false, false, false, NULL }
274 /* Initialize the GCC target structure. */
275 #undef TARGET_ATTRIBUTE_TABLE
276 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
278 #undef TARGET_INIT_BUILTINS
279 #define TARGET_INIT_BUILTINS ia64_init_builtins
281 #undef TARGET_EXPAND_BUILTIN
282 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
284 #undef TARGET_ASM_BYTE_OP
285 #define TARGET_ASM_BYTE_OP "\tdata1\t"
286 #undef TARGET_ASM_ALIGNED_HI_OP
287 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
288 #undef TARGET_ASM_ALIGNED_SI_OP
289 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
290 #undef TARGET_ASM_ALIGNED_DI_OP
291 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
292 #undef TARGET_ASM_UNALIGNED_HI_OP
293 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
294 #undef TARGET_ASM_UNALIGNED_SI_OP
295 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
296 #undef TARGET_ASM_UNALIGNED_DI_OP
297 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
298 #undef TARGET_ASM_INTEGER
299 #define TARGET_ASM_INTEGER ia64_assemble_integer
301 #undef TARGET_ASM_FUNCTION_PROLOGUE
302 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
303 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
304 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
305 #undef TARGET_ASM_FUNCTION_EPILOGUE
306 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
308 #undef TARGET_IN_SMALL_DATA_P
309 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
311 #undef TARGET_SCHED_ADJUST_COST
312 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
313 #undef TARGET_SCHED_ISSUE_RATE
314 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
315 #undef TARGET_SCHED_VARIABLE_ISSUE
316 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
317 #undef TARGET_SCHED_INIT
318 #define TARGET_SCHED_INIT ia64_sched_init
319 #undef TARGET_SCHED_FINISH
320 #define TARGET_SCHED_FINISH ia64_sched_finish
321 #undef TARGET_SCHED_REORDER
322 #define TARGET_SCHED_REORDER ia64_sched_reorder
323 #undef TARGET_SCHED_REORDER2
324 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
326 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
327 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
329 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
330 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia64_use_dfa_pipeline_interface
332 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
333 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
335 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
336 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
337 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
338 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
340 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
341 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
342 ia64_first_cycle_multipass_dfa_lookahead_guard
344 #undef TARGET_SCHED_DFA_NEW_CYCLE
345 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
347 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
348 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
350 #undef TARGET_ASM_OUTPUT_MI_THUNK
351 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
352 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
353 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
355 #undef TARGET_ASM_FILE_START
356 #define TARGET_ASM_FILE_START ia64_file_start
358 #undef TARGET_RTX_COSTS
359 #define TARGET_RTX_COSTS ia64_rtx_costs
360 #undef TARGET_ADDRESS_COST
361 #define TARGET_ADDRESS_COST hook_int_rtx_0
363 #undef TARGET_MACHINE_DEPENDENT_REORG
364 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
366 #undef TARGET_ENCODE_SECTION_INFO
367 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
369 struct gcc_target targetm = TARGET_INITIALIZER;
371 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
374 call_operand (rtx op, enum machine_mode mode)
376 if (mode != GET_MODE (op) && mode != VOIDmode)
379 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
380 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
383 /* Return 1 if OP refers to a symbol in the sdata section. */
386 sdata_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
388 switch (GET_CODE (op))
391 if (GET_CODE (XEXP (op, 0)) != PLUS
392 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
394 op = XEXP (XEXP (op, 0), 0);
398 if (CONSTANT_POOL_ADDRESS_P (op))
399 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
401 return SYMBOL_REF_LOCAL_P (op) && SYMBOL_REF_SMALL_P (op);
411 small_addr_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
413 return SYMBOL_REF_SMALL_ADDR_P (op);
416 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
419 got_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
421 switch (GET_CODE (op))
425 if (GET_CODE (op) != PLUS)
427 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
430 if (GET_CODE (op) != CONST_INT)
435 /* Ok if we're not using GOT entries at all. */
436 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
439 /* "Ok" while emitting rtl, since otherwise we won't be provided
440 with the entire offset during emission, which makes it very
441 hard to split the offset into high and low parts. */
442 if (rtx_equal_function_value_matters)
445 /* Force the low 14 bits of the constant to zero so that we do not
446 use up so many GOT entries. */
447 return (INTVAL (op) & 0x3fff) == 0;
450 if (SYMBOL_REF_SMALL_ADDR_P (op))
461 /* Return 1 if OP refers to a symbol. */
464 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
466 switch (GET_CODE (op))
479 /* Return tls_model if OP refers to a TLS symbol. */
482 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
484 if (GET_CODE (op) != SYMBOL_REF)
486 return SYMBOL_REF_TLS_MODEL (op);
490 /* Return 1 if OP refers to a function. */
493 function_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
495 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (op))
501 /* Return 1 if OP is setjmp or a similar function. */
503 /* ??? This is an unsatisfying solution. Should rethink. */
506 setjmp_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
511 if (GET_CODE (op) != SYMBOL_REF)
516 /* The following code is borrowed from special_function_p in calls.c. */
518 /* Disregard prefix _, __ or __x. */
521 if (name[1] == '_' && name[2] == 'x')
523 else if (name[1] == '_')
533 && (! strcmp (name, "setjmp")
534 || ! strcmp (name, "setjmp_syscall")))
536 && ! strcmp (name, "sigsetjmp"))
538 && ! strcmp (name, "savectx")));
540 else if ((name[0] == 'q' && name[1] == 's'
541 && ! strcmp (name, "qsetjmp"))
542 || (name[0] == 'v' && name[1] == 'f'
543 && ! strcmp (name, "vfork")))
549 /* Return 1 if OP is a general operand, excluding tls symbolic operands. */
552 move_operand (rtx op, enum machine_mode mode)
554 return general_operand (op, mode) && !tls_symbolic_operand (op, mode);
557 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
560 gr_register_operand (rtx op, enum machine_mode mode)
562 if (! register_operand (op, mode))
564 if (GET_CODE (op) == SUBREG)
565 op = SUBREG_REG (op);
566 if (GET_CODE (op) == REG)
568 unsigned int regno = REGNO (op);
569 if (regno < FIRST_PSEUDO_REGISTER)
570 return GENERAL_REGNO_P (regno);
575 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
578 fr_register_operand (rtx op, enum machine_mode mode)
580 if (! register_operand (op, mode))
582 if (GET_CODE (op) == SUBREG)
583 op = SUBREG_REG (op);
584 if (GET_CODE (op) == REG)
586 unsigned int regno = REGNO (op);
587 if (regno < FIRST_PSEUDO_REGISTER)
588 return FR_REGNO_P (regno);
593 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
596 grfr_register_operand (rtx op, enum machine_mode mode)
598 if (! register_operand (op, mode))
600 if (GET_CODE (op) == SUBREG)
601 op = SUBREG_REG (op);
602 if (GET_CODE (op) == REG)
604 unsigned int regno = REGNO (op);
605 if (regno < FIRST_PSEUDO_REGISTER)
606 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
611 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
614 gr_nonimmediate_operand (rtx op, enum machine_mode mode)
616 if (! nonimmediate_operand (op, mode))
618 if (GET_CODE (op) == SUBREG)
619 op = SUBREG_REG (op);
620 if (GET_CODE (op) == REG)
622 unsigned int regno = REGNO (op);
623 if (regno < FIRST_PSEUDO_REGISTER)
624 return GENERAL_REGNO_P (regno);
629 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
632 fr_nonimmediate_operand (rtx op, enum machine_mode mode)
634 if (! nonimmediate_operand (op, mode))
636 if (GET_CODE (op) == SUBREG)
637 op = SUBREG_REG (op);
638 if (GET_CODE (op) == REG)
640 unsigned int regno = REGNO (op);
641 if (regno < FIRST_PSEUDO_REGISTER)
642 return FR_REGNO_P (regno);
647 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
650 grfr_nonimmediate_operand (rtx op, enum machine_mode mode)
652 if (! nonimmediate_operand (op, mode))
654 if (GET_CODE (op) == SUBREG)
655 op = SUBREG_REG (op);
656 if (GET_CODE (op) == REG)
658 unsigned int regno = REGNO (op);
659 if (regno < FIRST_PSEUDO_REGISTER)
660 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
665 /* Return 1 if OP is a GR register operand, or zero. */
668 gr_reg_or_0_operand (rtx op, enum machine_mode mode)
670 return (op == const0_rtx || gr_register_operand (op, mode));
673 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
676 gr_reg_or_5bit_operand (rtx op, enum machine_mode mode)
678 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
679 || GET_CODE (op) == CONSTANT_P_RTX
680 || gr_register_operand (op, mode));
683 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
686 gr_reg_or_6bit_operand (rtx op, enum machine_mode mode)
688 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
689 || GET_CODE (op) == CONSTANT_P_RTX
690 || gr_register_operand (op, mode));
693 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
696 gr_reg_or_8bit_operand (rtx op, enum machine_mode mode)
698 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
699 || GET_CODE (op) == CONSTANT_P_RTX
700 || gr_register_operand (op, mode));
703 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
706 grfr_reg_or_8bit_operand (rtx op, enum machine_mode mode)
708 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
709 || GET_CODE (op) == CONSTANT_P_RTX
710 || grfr_register_operand (op, mode));
713 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
717 gr_reg_or_8bit_adjusted_operand (rtx op, enum machine_mode mode)
719 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
720 || GET_CODE (op) == CONSTANT_P_RTX
721 || gr_register_operand (op, mode));
724 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
725 immediate and an 8 bit adjusted immediate operand. This is necessary
726 because when we emit a compare, we don't know what the condition will be,
727 so we need the union of the immediates accepted by GT and LT. */
730 gr_reg_or_8bit_and_adjusted_operand (rtx op, enum machine_mode mode)
732 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
733 && CONST_OK_FOR_L (INTVAL (op)))
734 || GET_CODE (op) == CONSTANT_P_RTX
735 || gr_register_operand (op, mode));
738 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
741 gr_reg_or_14bit_operand (rtx op, enum machine_mode mode)
743 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
744 || GET_CODE (op) == CONSTANT_P_RTX
745 || gr_register_operand (op, mode));
748 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
751 gr_reg_or_22bit_operand (rtx op, enum machine_mode mode)
753 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
754 || GET_CODE (op) == CONSTANT_P_RTX
755 || gr_register_operand (op, mode));
758 /* Return 1 if OP is a 6 bit immediate operand. */
761 shift_count_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
763 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
764 || GET_CODE (op) == CONSTANT_P_RTX);
767 /* Return 1 if OP is a 5 bit immediate operand. */
770 shift_32bit_count_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
772 return ((GET_CODE (op) == CONST_INT
773 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
774 || GET_CODE (op) == CONSTANT_P_RTX);
777 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
780 shladd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
782 return (GET_CODE (op) == CONST_INT
783 && (INTVAL (op) == 2 || INTVAL (op) == 4
784 || INTVAL (op) == 8 || INTVAL (op) == 16));
787 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
790 fetchadd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
792 return (GET_CODE (op) == CONST_INT
793 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
794 INTVAL (op) == -4 || INTVAL (op) == -1 ||
795 INTVAL (op) == 1 || INTVAL (op) == 4 ||
796 INTVAL (op) == 8 || INTVAL (op) == 16));
799 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
802 fr_reg_or_fp01_operand (rtx op, enum machine_mode mode)
804 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
805 || fr_register_operand (op, mode));
808 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
809 POST_MODIFY with a REG as displacement. */
812 destination_operand (rtx op, enum machine_mode mode)
814 if (! nonimmediate_operand (op, mode))
816 if (GET_CODE (op) == MEM
817 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
818 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
823 /* Like memory_operand, but don't allow post-increments. */
826 not_postinc_memory_operand (rtx op, enum machine_mode mode)
828 return (memory_operand (op, mode)
829 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
832 /* Return 1 if this is a comparison operator, which accepts a normal 8-bit
833 signed immediate operand. */
836 normal_comparison_operator (register rtx op, enum machine_mode mode)
838 enum rtx_code code = GET_CODE (op);
839 return ((mode == VOIDmode || GET_MODE (op) == mode)
840 && (code == EQ || code == NE
841 || code == GT || code == LE || code == GTU || code == LEU));
844 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
845 signed immediate operand. */
848 adjusted_comparison_operator (register rtx op, enum machine_mode mode)
850 enum rtx_code code = GET_CODE (op);
851 return ((mode == VOIDmode || GET_MODE (op) == mode)
852 && (code == LT || code == GE || code == LTU || code == GEU));
855 /* Return 1 if this is a signed inequality operator. */
858 signed_inequality_operator (register rtx op, enum machine_mode mode)
860 enum rtx_code code = GET_CODE (op);
861 return ((mode == VOIDmode || GET_MODE (op) == mode)
862 && (code == GE || code == GT
863 || code == LE || code == LT));
866 /* Return 1 if this operator is valid for predication. */
869 predicate_operator (register rtx op, enum machine_mode mode)
871 enum rtx_code code = GET_CODE (op);
872 return ((GET_MODE (op) == mode || mode == VOIDmode)
873 && (code == EQ || code == NE));
876 /* Return 1 if this operator can be used in a conditional operation. */
879 condop_operator (register rtx op, enum machine_mode mode)
881 enum rtx_code code = GET_CODE (op);
882 return ((GET_MODE (op) == mode || mode == VOIDmode)
883 && (code == PLUS || code == MINUS || code == AND
884 || code == IOR || code == XOR));
887 /* Return 1 if this is the ar.lc register. */
890 ar_lc_reg_operand (register rtx op, enum machine_mode mode)
892 return (GET_MODE (op) == DImode
893 && (mode == DImode || mode == VOIDmode)
894 && GET_CODE (op) == REG
895 && REGNO (op) == AR_LC_REGNUM);
898 /* Return 1 if this is the ar.ccv register. */
901 ar_ccv_reg_operand (register rtx op, enum machine_mode mode)
903 return ((GET_MODE (op) == mode || mode == VOIDmode)
904 && GET_CODE (op) == REG
905 && REGNO (op) == AR_CCV_REGNUM);
908 /* Return 1 if this is the ar.pfs register. */
911 ar_pfs_reg_operand (register rtx op, enum machine_mode mode)
913 return ((GET_MODE (op) == mode || mode == VOIDmode)
914 && GET_CODE (op) == REG
915 && REGNO (op) == AR_PFS_REGNUM);
918 /* Like general_operand, but don't allow (mem (addressof)). */
921 general_xfmode_operand (rtx op, enum machine_mode mode)
923 if (! general_operand (op, mode))
925 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
933 destination_xfmode_operand (rtx op, enum machine_mode mode)
935 if (! destination_operand (op, mode))
937 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
945 xfreg_or_fp01_operand (rtx op, enum machine_mode mode)
947 if (GET_CODE (op) == SUBREG)
949 return fr_reg_or_fp01_operand (op, mode);
952 /* Return 1 if OP is valid as a base register in a reg + offset address. */
955 basereg_operand (rtx op, enum machine_mode mode)
957 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
958 checks from pa.c basereg_operand as well? Seems to be OK without them
961 return (register_operand (op, mode) &&
962 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
967 ADDR_AREA_NORMAL, /* normal address area */
968 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
972 static GTY(()) tree small_ident1;
973 static GTY(()) tree small_ident2;
978 if (small_ident1 == 0)
980 small_ident1 = get_identifier ("small");
981 small_ident2 = get_identifier ("__small__");
985 /* Retrieve the address area that has been chosen for the given decl. */
987 static ia64_addr_area
988 ia64_get_addr_area (tree decl)
992 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
998 id = TREE_VALUE (TREE_VALUE (model_attr));
999 if (id == small_ident1 || id == small_ident2)
1000 return ADDR_AREA_SMALL;
1002 return ADDR_AREA_NORMAL;
1006 ia64_handle_model_attribute (tree *node, tree name, tree args, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1008 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
1009 ia64_addr_area area;
1010 tree arg, decl = *node;
1013 arg = TREE_VALUE (args);
1014 if (arg == small_ident1 || arg == small_ident2)
1016 addr_area = ADDR_AREA_SMALL;
1020 warning ("invalid argument of `%s' attribute",
1021 IDENTIFIER_POINTER (name));
1022 *no_add_attrs = true;
1025 switch (TREE_CODE (decl))
1028 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
1030 && !TREE_STATIC (decl))
1032 error ("%Jan address area attribute cannot be specified for "
1033 "local variables", decl, decl);
1034 *no_add_attrs = true;
1036 area = ia64_get_addr_area (decl);
1037 if (area != ADDR_AREA_NORMAL && addr_area != area)
1039 error ("%Jaddress area of '%s' conflicts with previous "
1040 "declaration", decl, decl);
1041 *no_add_attrs = true;
1046 error ("%Jaddress area attribute cannot be specified for functions",
1048 *no_add_attrs = true;
1052 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1053 *no_add_attrs = true;
1061 ia64_encode_addr_area (tree decl, rtx symbol)
1065 flags = SYMBOL_REF_FLAGS (symbol);
1066 switch (ia64_get_addr_area (decl))
1068 case ADDR_AREA_NORMAL: break;
1069 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
1072 SYMBOL_REF_FLAGS (symbol) = flags;
1076 ia64_encode_section_info (tree decl, rtx rtl, int first)
1078 default_encode_section_info (decl, rtl, first);
1080 if (TREE_CODE (decl) == VAR_DECL
1081 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
1082 ia64_encode_addr_area (decl, XEXP (rtl, 0));
1085 /* Return 1 if the operands of a move are ok. */
1088 ia64_move_ok (rtx dst, rtx src)
1090 /* If we're under init_recog_no_volatile, we'll not be able to use
1091 memory_operand. So check the code directly and don't worry about
1092 the validity of the underlying address, which should have been
1093 checked elsewhere anyway. */
1094 if (GET_CODE (dst) != MEM)
1096 if (GET_CODE (src) == MEM)
1098 if (register_operand (src, VOIDmode))
1101 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
1102 if (INTEGRAL_MODE_P (GET_MODE (dst)))
1103 return src == const0_rtx;
1105 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1109 addp4_optimize_ok (rtx op1, rtx op2)
1111 return (basereg_operand (op1, GET_MODE(op1)) !=
1112 basereg_operand (op2, GET_MODE(op2)));
1115 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
1116 Return the length of the field, or <= 0 on failure. */
1119 ia64_depz_field_mask (rtx rop, rtx rshift)
1121 unsigned HOST_WIDE_INT op = INTVAL (rop);
1122 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1124 /* Get rid of the zero bits we're shifting in. */
1127 /* We must now have a solid block of 1's at bit 0. */
1128 return exact_log2 (op + 1);
1131 /* Expand a symbolic constant load. */
1134 ia64_expand_load_address (rtx dest, rtx src)
1136 if (tls_symbolic_operand (src, VOIDmode))
1138 if (GET_CODE (dest) != REG)
1141 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1142 having to pointer-extend the value afterward. Other forms of address
1143 computation below are also more natural to compute as 64-bit quantities.
1144 If we've been given an SImode destination register, change it. */
1145 if (GET_MODE (dest) != Pmode)
1146 dest = gen_rtx_REG (Pmode, REGNO (dest));
1148 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (src))
1150 emit_insn (gen_rtx_SET (VOIDmode, dest, src));
1153 else if (TARGET_AUTO_PIC)
1155 emit_insn (gen_load_gprel64 (dest, src));
1158 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1160 emit_insn (gen_load_fptr (dest, src));
1163 else if (sdata_symbolic_operand (src, VOIDmode))
1165 emit_insn (gen_load_gprel (dest, src));
1169 if (GET_CODE (src) == CONST
1170 && GET_CODE (XEXP (src, 0)) == PLUS
1171 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1172 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1174 rtx sym = XEXP (XEXP (src, 0), 0);
1175 HOST_WIDE_INT ofs, hi, lo;
1177 /* Split the offset into a sign extended 14-bit low part
1178 and a complementary high part. */
1179 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1180 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1183 ia64_expand_load_address (dest, plus_constant (sym, hi));
1184 emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
1190 tmp = gen_rtx_HIGH (Pmode, src);
1191 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1192 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1194 tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
1195 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1199 static GTY(()) rtx gen_tls_tga;
1201 gen_tls_get_addr (void)
1204 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1208 static GTY(()) rtx thread_pointer_rtx;
1210 gen_thread_pointer (void)
1212 if (!thread_pointer_rtx)
1214 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1215 RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1217 return thread_pointer_rtx;
1221 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1)
1223 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1228 case TLS_MODEL_GLOBAL_DYNAMIC:
1231 tga_op1 = gen_reg_rtx (Pmode);
1232 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1233 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1234 RTX_UNCHANGING_P (tga_op1) = 1;
1236 tga_op2 = gen_reg_rtx (Pmode);
1237 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1238 tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1239 RTX_UNCHANGING_P (tga_op2) = 1;
1241 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1242 LCT_CONST, Pmode, 2, tga_op1,
1243 Pmode, tga_op2, Pmode);
1245 insns = get_insns ();
1248 if (GET_MODE (op0) != Pmode)
1250 emit_libcall_block (insns, op0, tga_ret, op1);
1253 case TLS_MODEL_LOCAL_DYNAMIC:
1254 /* ??? This isn't the completely proper way to do local-dynamic
1255 If the call to __tls_get_addr is used only by a single symbol,
1256 then we should (somehow) move the dtprel to the second arg
1257 to avoid the extra add. */
1260 tga_op1 = gen_reg_rtx (Pmode);
1261 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1262 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1263 RTX_UNCHANGING_P (tga_op1) = 1;
1265 tga_op2 = const0_rtx;
1267 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1268 LCT_CONST, Pmode, 2, tga_op1,
1269 Pmode, tga_op2, Pmode);
1271 insns = get_insns ();
1274 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1276 tmp = gen_reg_rtx (Pmode);
1277 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1279 if (!register_operand (op0, Pmode))
1280 op0 = gen_reg_rtx (Pmode);
1283 emit_insn (gen_load_dtprel (op0, op1));
1284 emit_insn (gen_adddi3 (op0, tmp, op0));
1287 emit_insn (gen_add_dtprel (op0, tmp, op1));
1290 case TLS_MODEL_INITIAL_EXEC:
1291 tmp = gen_reg_rtx (Pmode);
1292 emit_insn (gen_load_ltoff_tprel (tmp, op1));
1293 tmp = gen_rtx_MEM (Pmode, tmp);
1294 RTX_UNCHANGING_P (tmp) = 1;
1295 tmp = force_reg (Pmode, tmp);
1297 if (!register_operand (op0, Pmode))
1298 op0 = gen_reg_rtx (Pmode);
1299 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1302 case TLS_MODEL_LOCAL_EXEC:
1303 if (!register_operand (op0, Pmode))
1304 op0 = gen_reg_rtx (Pmode);
1307 emit_insn (gen_load_tprel (op0, op1));
1308 emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
1311 emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
1318 if (orig_op0 == op0)
1320 if (GET_MODE (orig_op0) == Pmode)
1322 return gen_lowpart (GET_MODE (orig_op0), op0);
1326 ia64_expand_move (rtx op0, rtx op1)
1328 enum machine_mode mode = GET_MODE (op0);
1330 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1331 op1 = force_reg (mode, op1);
1333 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1335 enum tls_model tls_kind;
1336 if ((tls_kind = tls_symbolic_operand (op1, VOIDmode)))
1337 return ia64_expand_tls_address (tls_kind, op0, op1);
1339 if (!TARGET_NO_PIC && reload_completed)
1341 ia64_expand_load_address (op0, op1);
1349 /* Split a move from OP1 to OP0 conditional on COND. */
1352 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1354 rtx insn, first = get_last_insn ();
1356 emit_move_insn (op0, op1);
1358 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1360 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1364 /* Split a post-reload TImode or TFmode reference into two DImode
1368 ia64_split_tmode (rtx out[2], rtx in, rtx scratch)
1370 switch (GET_CODE (in))
1373 out[0] = gen_rtx_REG (DImode, REGNO (in));
1374 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1379 rtx base = XEXP (in, 0);
1381 switch (GET_CODE (base))
1384 out[0] = adjust_address (in, DImode, 0);
1387 base = XEXP (base, 0);
1388 out[0] = adjust_address (in, DImode, 0);
1391 /* Since we're changing the mode, we need to change to POST_MODIFY
1392 as well to preserve the size of the increment. Either that or
1393 do the update in two steps, but we've already got this scratch
1394 register handy so let's use it. */
1396 base = XEXP (base, 0);
1398 = change_address (in, DImode,
1400 (Pmode, base, plus_constant (base, 16)));
1403 base = XEXP (base, 0);
1405 = change_address (in, DImode,
1407 (Pmode, base, plus_constant (base, -16)));
1413 if (scratch == NULL_RTX)
1415 out[1] = change_address (in, DImode, scratch);
1416 return gen_adddi3 (scratch, base, GEN_INT (8));
1421 if (GET_MODE (in) != TFmode)
1422 split_double (in, &out[0], &out[1]);
1424 /* split_double does not understand how to split a TFmode
1425 quantity into a pair of DImode constants. */
1428 unsigned HOST_WIDE_INT p[2];
1429 long l[4]; /* TFmode is 128 bits */
1431 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1432 real_to_target (l, &r, TFmode);
1434 if (FLOAT_WORDS_BIG_ENDIAN)
1436 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1437 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1441 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1442 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1444 out[0] = GEN_INT (p[0]);
1445 out[1] = GEN_INT (p[1]);
1454 /* Split a TImode or TFmode move instruction after reload.
1455 This is used by *movtf_internal and *movti_internal. */
1457 ia64_split_tmode_move (rtx operands[])
1459 rtx adj1, adj2, in[2], out[2], insn;
1462 adj1 = ia64_split_tmode (in, operands[1], operands[2]);
1463 adj2 = ia64_split_tmode (out, operands[0], operands[2]);
1466 if (reg_overlap_mentioned_p (out[0], in[1]))
1468 if (reg_overlap_mentioned_p (out[1], in[0]))
1479 insn = emit_insn (gen_rtx_SET (VOIDmode, out[first], in[first]));
1480 if (GET_CODE (out[first]) == MEM
1481 && GET_CODE (XEXP (out[first], 0)) == POST_MODIFY)
1482 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC,
1483 XEXP (XEXP (out[first], 0), 0),
1485 insn = emit_insn (gen_rtx_SET (VOIDmode, out[!first], in[!first]));
1486 if (GET_CODE (out[!first]) == MEM
1487 && GET_CODE (XEXP (out[!first], 0)) == POST_MODIFY)
1488 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC,
1489 XEXP (XEXP (out[!first], 0), 0),
1494 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1495 through memory plus an extra GR scratch register. Except that you can
1496 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1497 SECONDARY_RELOAD_CLASS, but not both.
1499 We got into problems in the first place by allowing a construct like
1500 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1501 This solution attempts to prevent this situation from occurring. When
1502 we see something like the above, we spill the inner register to memory. */
1505 spill_xfmode_operand (rtx in, int force)
1507 if (GET_CODE (in) == SUBREG
1508 && GET_MODE (SUBREG_REG (in)) == TImode
1509 && GET_CODE (SUBREG_REG (in)) == REG)
1511 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, /*rescan=*/true);
1512 return gen_rtx_MEM (XFmode, copy_to_reg (XEXP (mem, 0)));
1514 else if (force && GET_CODE (in) == REG)
1516 rtx mem = gen_mem_addressof (in, NULL_TREE, /*rescan=*/true);
1517 return gen_rtx_MEM (XFmode, copy_to_reg (XEXP (mem, 0)));
1519 else if (GET_CODE (in) == MEM
1520 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1521 return change_address (in, XFmode, copy_to_reg (XEXP (in, 0)));
1526 /* Emit comparison instruction if necessary, returning the expression
1527 that holds the compare result in the proper mode. */
1529 static GTY(()) rtx cmptf_libfunc;
1532 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1534 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1537 /* If we have a BImode input, then we already have a compare result, and
1538 do not need to emit another comparison. */
1539 if (GET_MODE (op0) == BImode)
1541 if ((code == NE || code == EQ) && op1 == const0_rtx)
1546 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1547 magic number as its third argument, that indicates what to do.
1548 The return value is an integer to be compared against zero. */
1549 else if (TARGET_HPUX && GET_MODE (op0) == TFmode)
1552 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1558 enum rtx_code ncode;
1560 if (GET_MODE (op1) != TFmode)
1564 /* 1 = equal, 0 = not equal. Equality operators do
1565 not raise FP_INVALID when given an SNaN operand. */
1566 case EQ: magic = QCMP_EQ; ncode = NE; break;
1567 case NE: magic = QCMP_EQ; ncode = EQ; break;
1568 /* isunordered() from C99. */
1569 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1570 /* Relational operators raise FP_INVALID when given
1572 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1573 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1574 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1575 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1576 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1577 Expanders for buneq etc. weuld have to be added to ia64.md
1578 for this to be useful. */
1584 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1585 op0, TFmode, op1, TFmode,
1586 GEN_INT (magic), DImode);
1587 cmp = gen_reg_rtx (BImode);
1588 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1589 gen_rtx_fmt_ee (ncode, BImode,
1592 insns = get_insns ();
1595 emit_libcall_block (insns, cmp, cmp,
1596 gen_rtx_fmt_ee (code, BImode, op0, op1));
1601 cmp = gen_reg_rtx (BImode);
1602 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1603 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1607 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1610 /* Emit the appropriate sequence for a call. */
1613 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1618 addr = XEXP (addr, 0);
1619 addr = convert_memory_address (DImode, addr);
1620 b0 = gen_rtx_REG (DImode, R_BR (0));
1622 /* ??? Should do this for functions known to bind local too. */
1623 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1626 insn = gen_sibcall_nogp (addr);
1628 insn = gen_call_nogp (addr, b0);
1630 insn = gen_call_value_nogp (retval, addr, b0);
1631 insn = emit_call_insn (insn);
1636 insn = gen_sibcall_gp (addr);
1638 insn = gen_call_gp (addr, b0);
1640 insn = gen_call_value_gp (retval, addr, b0);
1641 insn = emit_call_insn (insn);
1643 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1647 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1651 ia64_reload_gp (void)
1655 if (current_frame_info.reg_save_gp)
1656 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1659 HOST_WIDE_INT offset;
1661 offset = (current_frame_info.spill_cfa_off
1662 + current_frame_info.spill_size);
1663 if (frame_pointer_needed)
1665 tmp = hard_frame_pointer_rtx;
1670 tmp = stack_pointer_rtx;
1671 offset = current_frame_info.total_size - offset;
1674 if (CONST_OK_FOR_I (offset))
1675 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1676 tmp, GEN_INT (offset)));
1679 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1680 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1681 pic_offset_table_rtx, tmp));
1684 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1687 emit_move_insn (pic_offset_table_rtx, tmp);
1691 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1692 rtx scratch_b, int noreturn_p, int sibcall_p)
1695 bool is_desc = false;
1697 /* If we find we're calling through a register, then we're actually
1698 calling through a descriptor, so load up the values. */
1699 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1704 /* ??? We are currently constrained to *not* use peep2, because
1705 we can legitimately change the global lifetime of the GP
1706 (in the form of killing where previously live). This is
1707 because a call through a descriptor doesn't use the previous
1708 value of the GP, while a direct call does, and we do not
1709 commit to either form until the split here.
1711 That said, this means that we lack precise life info for
1712 whether ADDR is dead after this call. This is not terribly
1713 important, since we can fix things up essentially for free
1714 with the POST_DEC below, but it's nice to not use it when we
1715 can immediately tell it's not necessary. */
1716 addr_dead_p = ((noreturn_p || sibcall_p
1717 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1719 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1721 /* Load the code address into scratch_b. */
1722 tmp = gen_rtx_POST_INC (Pmode, addr);
1723 tmp = gen_rtx_MEM (Pmode, tmp);
1724 emit_move_insn (scratch_r, tmp);
1725 emit_move_insn (scratch_b, scratch_r);
1727 /* Load the GP address. If ADDR is not dead here, then we must
1728 revert the change made above via the POST_INCREMENT. */
1730 tmp = gen_rtx_POST_DEC (Pmode, addr);
1733 tmp = gen_rtx_MEM (Pmode, tmp);
1734 emit_move_insn (pic_offset_table_rtx, tmp);
1741 insn = gen_sibcall_nogp (addr);
1743 insn = gen_call_value_nogp (retval, addr, retaddr);
1745 insn = gen_call_nogp (addr, retaddr);
1746 emit_call_insn (insn);
1748 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1752 /* Begin the assembly file. */
1755 ia64_file_start (void)
1757 default_file_start ();
1758 emit_safe_across_calls ();
1762 emit_safe_across_calls (void)
1764 unsigned int rs, re;
1771 while (rs < 64 && call_used_regs[PR_REG (rs)])
1775 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1779 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1783 fputc (',', asm_out_file);
1785 fprintf (asm_out_file, "p%u", rs);
1787 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1791 fputc ('\n', asm_out_file);
1794 /* Helper function for ia64_compute_frame_size: find an appropriate general
1795 register to spill some special register to. SPECIAL_SPILL_MASK contains
1796 bits in GR0 to GR31 that have already been allocated by this routine.
1797 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1800 find_gr_spill (int try_locals)
1804 /* If this is a leaf function, first try an otherwise unused
1805 call-clobbered register. */
1806 if (current_function_is_leaf)
1808 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1809 if (! regs_ever_live[regno]
1810 && call_used_regs[regno]
1811 && ! fixed_regs[regno]
1812 && ! global_regs[regno]
1813 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1815 current_frame_info.gr_used_mask |= 1 << regno;
1822 regno = current_frame_info.n_local_regs;
1823 /* If there is a frame pointer, then we can't use loc79, because
1824 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1825 reg_name switching code in ia64_expand_prologue. */
1826 if (regno < (80 - frame_pointer_needed))
1828 current_frame_info.n_local_regs = regno + 1;
1829 return LOC_REG (0) + regno;
1833 /* Failed to find a general register to spill to. Must use stack. */
1837 /* In order to make for nice schedules, we try to allocate every temporary
1838 to a different register. We must of course stay away from call-saved,
1839 fixed, and global registers. We must also stay away from registers
1840 allocated in current_frame_info.gr_used_mask, since those include regs
1841 used all through the prologue.
1843 Any register allocated here must be used immediately. The idea is to
1844 aid scheduling, not to solve data flow problems. */
1846 static int last_scratch_gr_reg;
1849 next_scratch_gr_reg (void)
1853 for (i = 0; i < 32; ++i)
1855 regno = (last_scratch_gr_reg + i + 1) & 31;
1856 if (call_used_regs[regno]
1857 && ! fixed_regs[regno]
1858 && ! global_regs[regno]
1859 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1861 last_scratch_gr_reg = regno;
1866 /* There must be _something_ available. */
1870 /* Helper function for ia64_compute_frame_size, called through
1871 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1874 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
1876 unsigned int regno = REGNO (reg);
1879 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1880 for (i = 0; i < n; ++i)
1881 current_frame_info.gr_used_mask |= 1 << (regno + i);
1885 /* Returns the number of bytes offset between the frame pointer and the stack
1886 pointer for the current function. SIZE is the number of bytes of space
1887 needed for local variables. */
1890 ia64_compute_frame_size (HOST_WIDE_INT size)
1892 HOST_WIDE_INT total_size;
1893 HOST_WIDE_INT spill_size = 0;
1894 HOST_WIDE_INT extra_spill_size = 0;
1895 HOST_WIDE_INT pretend_args_size;
1898 int spilled_gr_p = 0;
1899 int spilled_fr_p = 0;
1903 if (current_frame_info.initialized)
1906 memset (¤t_frame_info, 0, sizeof current_frame_info);
1907 CLEAR_HARD_REG_SET (mask);
1909 /* Don't allocate scratches to the return register. */
1910 diddle_return_value (mark_reg_gr_used_mask, NULL);
1912 /* Don't allocate scratches to the EH scratch registers. */
1913 if (cfun->machine->ia64_eh_epilogue_sp)
1914 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1915 if (cfun->machine->ia64_eh_epilogue_bsp)
1916 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1918 /* Find the size of the register stack frame. We have only 80 local
1919 registers, because we reserve 8 for the inputs and 8 for the
1922 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1923 since we'll be adjusting that down later. */
1924 regno = LOC_REG (78) + ! frame_pointer_needed;
1925 for (; regno >= LOC_REG (0); regno--)
1926 if (regs_ever_live[regno])
1928 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1930 /* For functions marked with the syscall_linkage attribute, we must mark
1931 all eight input registers as in use, so that locals aren't visible to
1934 if (cfun->machine->n_varargs > 0
1935 || lookup_attribute ("syscall_linkage",
1936 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1937 current_frame_info.n_input_regs = 8;
1940 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1941 if (regs_ever_live[regno])
1943 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1946 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1947 if (regs_ever_live[regno])
1949 i = regno - OUT_REG (0) + 1;
1951 /* When -p profiling, we need one output register for the mcount argument.
1952 Likewise for -a profiling for the bb_init_func argument. For -ax
1953 profiling, we need two output registers for the two bb_init_trace_func
1955 if (current_function_profile)
1957 current_frame_info.n_output_regs = i;
1959 /* ??? No rotating register support yet. */
1960 current_frame_info.n_rotate_regs = 0;
1962 /* Discover which registers need spilling, and how much room that
1963 will take. Begin with floating point and general registers,
1964 which will always wind up on the stack. */
1966 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1967 if (regs_ever_live[regno] && ! call_used_regs[regno])
1969 SET_HARD_REG_BIT (mask, regno);
1975 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1976 if (regs_ever_live[regno] && ! call_used_regs[regno])
1978 SET_HARD_REG_BIT (mask, regno);
1984 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1985 if (regs_ever_live[regno] && ! call_used_regs[regno])
1987 SET_HARD_REG_BIT (mask, regno);
1992 /* Now come all special registers that might get saved in other
1993 general registers. */
1995 if (frame_pointer_needed)
1997 current_frame_info.reg_fp = find_gr_spill (1);
1998 /* If we did not get a register, then we take LOC79. This is guaranteed
1999 to be free, even if regs_ever_live is already set, because this is
2000 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2001 as we don't count loc79 above. */
2002 if (current_frame_info.reg_fp == 0)
2004 current_frame_info.reg_fp = LOC_REG (79);
2005 current_frame_info.n_local_regs++;
2009 if (! current_function_is_leaf)
2011 /* Emit a save of BR0 if we call other functions. Do this even
2012 if this function doesn't return, as EH depends on this to be
2013 able to unwind the stack. */
2014 SET_HARD_REG_BIT (mask, BR_REG (0));
2016 current_frame_info.reg_save_b0 = find_gr_spill (1);
2017 if (current_frame_info.reg_save_b0 == 0)
2023 /* Similarly for ar.pfs. */
2024 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2025 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2026 if (current_frame_info.reg_save_ar_pfs == 0)
2028 extra_spill_size += 8;
2032 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2033 registers are clobbered, so we fall back to the stack. */
2034 current_frame_info.reg_save_gp
2035 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2036 if (current_frame_info.reg_save_gp == 0)
2038 SET_HARD_REG_BIT (mask, GR_REG (1));
2045 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2047 SET_HARD_REG_BIT (mask, BR_REG (0));
2052 if (regs_ever_live[AR_PFS_REGNUM])
2054 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2055 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2056 if (current_frame_info.reg_save_ar_pfs == 0)
2058 extra_spill_size += 8;
2064 /* Unwind descriptor hackery: things are most efficient if we allocate
2065 consecutive GR save registers for RP, PFS, FP in that order. However,
2066 it is absolutely critical that FP get the only hard register that's
2067 guaranteed to be free, so we allocated it first. If all three did
2068 happen to be allocated hard regs, and are consecutive, rearrange them
2069 into the preferred order now. */
2070 if (current_frame_info.reg_fp != 0
2071 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2072 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2074 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2075 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2076 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2079 /* See if we need to store the predicate register block. */
2080 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2081 if (regs_ever_live[regno] && ! call_used_regs[regno])
2083 if (regno <= PR_REG (63))
2085 SET_HARD_REG_BIT (mask, PR_REG (0));
2086 current_frame_info.reg_save_pr = find_gr_spill (1);
2087 if (current_frame_info.reg_save_pr == 0)
2089 extra_spill_size += 8;
2093 /* ??? Mark them all as used so that register renaming and such
2094 are free to use them. */
2095 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2096 regs_ever_live[regno] = 1;
2099 /* If we're forced to use st8.spill, we're forced to save and restore
2100 ar.unat as well. The check for existing liveness allows inline asm
2101 to touch ar.unat. */
2102 if (spilled_gr_p || cfun->machine->n_varargs
2103 || regs_ever_live[AR_UNAT_REGNUM])
2105 regs_ever_live[AR_UNAT_REGNUM] = 1;
2106 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2107 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2108 if (current_frame_info.reg_save_ar_unat == 0)
2110 extra_spill_size += 8;
2115 if (regs_ever_live[AR_LC_REGNUM])
2117 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2118 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2119 if (current_frame_info.reg_save_ar_lc == 0)
2121 extra_spill_size += 8;
2126 /* If we have an odd number of words of pretend arguments written to
2127 the stack, then the FR save area will be unaligned. We round the
2128 size of this area up to keep things 16 byte aligned. */
2130 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2132 pretend_args_size = current_function_pretend_args_size;
2134 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2135 + current_function_outgoing_args_size);
2136 total_size = IA64_STACK_ALIGN (total_size);
2138 /* We always use the 16-byte scratch area provided by the caller, but
2139 if we are a leaf function, there's no one to which we need to provide
2141 if (current_function_is_leaf)
2142 total_size = MAX (0, total_size - 16);
2144 current_frame_info.total_size = total_size;
2145 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2146 current_frame_info.spill_size = spill_size;
2147 current_frame_info.extra_spill_size = extra_spill_size;
2148 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2149 current_frame_info.n_spilled = n_spilled;
2150 current_frame_info.initialized = reload_completed;
2153 /* Compute the initial difference between the specified pair of registers. */
2156 ia64_initial_elimination_offset (int from, int to)
2158 HOST_WIDE_INT offset;
2160 ia64_compute_frame_size (get_frame_size ());
2163 case FRAME_POINTER_REGNUM:
2164 if (to == HARD_FRAME_POINTER_REGNUM)
2166 if (current_function_is_leaf)
2167 offset = -current_frame_info.total_size;
2169 offset = -(current_frame_info.total_size
2170 - current_function_outgoing_args_size - 16);
2172 else if (to == STACK_POINTER_REGNUM)
2174 if (current_function_is_leaf)
2177 offset = 16 + current_function_outgoing_args_size;
2183 case ARG_POINTER_REGNUM:
2184 /* Arguments start above the 16 byte save area, unless stdarg
2185 in which case we store through the 16 byte save area. */
2186 if (to == HARD_FRAME_POINTER_REGNUM)
2187 offset = 16 - current_function_pretend_args_size;
2188 else if (to == STACK_POINTER_REGNUM)
2189 offset = (current_frame_info.total_size
2190 + 16 - current_function_pretend_args_size);
2202 /* If there are more than a trivial number of register spills, we use
2203 two interleaved iterators so that we can get two memory references
2206 In order to simplify things in the prologue and epilogue expanders,
2207 we use helper functions to fix up the memory references after the
2208 fact with the appropriate offsets to a POST_MODIFY memory mode.
2209 The following data structure tracks the state of the two iterators
2210 while insns are being emitted. */
2212 struct spill_fill_data
2214 rtx init_after; /* point at which to emit initializations */
2215 rtx init_reg[2]; /* initial base register */
2216 rtx iter_reg[2]; /* the iterator registers */
2217 rtx *prev_addr[2]; /* address of last memory use */
2218 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2219 HOST_WIDE_INT prev_off[2]; /* last offset */
2220 int n_iter; /* number of iterators in use */
2221 int next_iter; /* next iterator to use */
2222 unsigned int save_gr_used_mask;
2225 static struct spill_fill_data spill_fill_data;
2228 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2232 spill_fill_data.init_after = get_last_insn ();
2233 spill_fill_data.init_reg[0] = init_reg;
2234 spill_fill_data.init_reg[1] = init_reg;
2235 spill_fill_data.prev_addr[0] = NULL;
2236 spill_fill_data.prev_addr[1] = NULL;
2237 spill_fill_data.prev_insn[0] = NULL;
2238 spill_fill_data.prev_insn[1] = NULL;
2239 spill_fill_data.prev_off[0] = cfa_off;
2240 spill_fill_data.prev_off[1] = cfa_off;
2241 spill_fill_data.next_iter = 0;
2242 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2244 spill_fill_data.n_iter = 1 + (n_spills > 2);
2245 for (i = 0; i < spill_fill_data.n_iter; ++i)
2247 int regno = next_scratch_gr_reg ();
2248 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2249 current_frame_info.gr_used_mask |= 1 << regno;
2254 finish_spill_pointers (void)
2256 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2260 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2262 int iter = spill_fill_data.next_iter;
2263 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2264 rtx disp_rtx = GEN_INT (disp);
2267 if (spill_fill_data.prev_addr[iter])
2269 if (CONST_OK_FOR_N (disp))
2271 *spill_fill_data.prev_addr[iter]
2272 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2273 gen_rtx_PLUS (DImode,
2274 spill_fill_data.iter_reg[iter],
2276 REG_NOTES (spill_fill_data.prev_insn[iter])
2277 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2278 REG_NOTES (spill_fill_data.prev_insn[iter]));
2282 /* ??? Could use register post_modify for loads. */
2283 if (! CONST_OK_FOR_I (disp))
2285 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2286 emit_move_insn (tmp, disp_rtx);
2289 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2290 spill_fill_data.iter_reg[iter], disp_rtx));
2293 /* Micro-optimization: if we've created a frame pointer, it's at
2294 CFA 0, which may allow the real iterator to be initialized lower,
2295 slightly increasing parallelism. Also, if there are few saves
2296 it may eliminate the iterator entirely. */
2298 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2299 && frame_pointer_needed)
2301 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2302 set_mem_alias_set (mem, get_varargs_alias_set ());
2310 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2311 spill_fill_data.init_reg[iter]);
2316 if (! CONST_OK_FOR_I (disp))
2318 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2319 emit_move_insn (tmp, disp_rtx);
2323 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2324 spill_fill_data.init_reg[iter],
2331 /* Careful for being the first insn in a sequence. */
2332 if (spill_fill_data.init_after)
2333 insn = emit_insn_after (seq, spill_fill_data.init_after);
2336 rtx first = get_insns ();
2338 insn = emit_insn_before (seq, first);
2340 insn = emit_insn (seq);
2342 spill_fill_data.init_after = insn;
2344 /* If DISP is 0, we may or may not have a further adjustment
2345 afterward. If we do, then the load/store insn may be modified
2346 to be a post-modify. If we don't, then this copy may be
2347 eliminated by copyprop_hardreg_forward, which makes this
2348 insn garbage, which runs afoul of the sanity check in
2349 propagate_one_insn. So mark this insn as legal to delete. */
2351 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2355 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2357 /* ??? Not all of the spills are for varargs, but some of them are.
2358 The rest of the spills belong in an alias set of their own. But
2359 it doesn't actually hurt to include them here. */
2360 set_mem_alias_set (mem, get_varargs_alias_set ());
2362 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2363 spill_fill_data.prev_off[iter] = cfa_off;
2365 if (++iter >= spill_fill_data.n_iter)
2367 spill_fill_data.next_iter = iter;
2373 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2376 int iter = spill_fill_data.next_iter;
2379 mem = spill_restore_mem (reg, cfa_off);
2380 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2381 spill_fill_data.prev_insn[iter] = insn;
2388 RTX_FRAME_RELATED_P (insn) = 1;
2390 /* Don't even pretend that the unwind code can intuit its way
2391 through a pair of interleaved post_modify iterators. Just
2392 provide the correct answer. */
2394 if (frame_pointer_needed)
2396 base = hard_frame_pointer_rtx;
2401 base = stack_pointer_rtx;
2402 off = current_frame_info.total_size - cfa_off;
2406 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2407 gen_rtx_SET (VOIDmode,
2408 gen_rtx_MEM (GET_MODE (reg),
2409 plus_constant (base, off)),
2416 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2418 int iter = spill_fill_data.next_iter;
2421 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2422 GEN_INT (cfa_off)));
2423 spill_fill_data.prev_insn[iter] = insn;
2426 /* Wrapper functions that discards the CONST_INT spill offset. These
2427 exist so that we can give gr_spill/gr_fill the offset they need and
2428 use a consistent function interface. */
2431 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2433 return gen_movdi (dest, src);
2437 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2439 return gen_fr_spill (dest, src);
2443 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2445 return gen_fr_restore (dest, src);
2448 /* Called after register allocation to add any instructions needed for the
2449 prologue. Using a prologue insn is favored compared to putting all of the
2450 instructions in output_function_prologue(), since it allows the scheduler
2451 to intermix instructions with the saves of the caller saved registers. In
2452 some cases, it might be necessary to emit a barrier instruction as the last
2453 insn to prevent such scheduling.
2455 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2456 so that the debug info generation code can handle them properly.
2458 The register save area is layed out like so:
2460 [ varargs spill area ]
2461 [ fr register spill area ]
2462 [ br register spill area ]
2463 [ ar register spill area ]
2464 [ pr register spill area ]
2465 [ gr register spill area ] */
2467 /* ??? Get inefficient code when the frame size is larger than can fit in an
2468 adds instruction. */
2471 ia64_expand_prologue (void)
2473 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2474 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2477 ia64_compute_frame_size (get_frame_size ());
2478 last_scratch_gr_reg = 15;
2480 /* If there is no epilogue, then we don't need some prologue insns.
2481 We need to avoid emitting the dead prologue insns, because flow
2482 will complain about them. */
2487 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2488 if ((e->flags & EDGE_FAKE) == 0
2489 && (e->flags & EDGE_FALLTHRU) != 0)
2491 epilogue_p = (e != NULL);
2496 /* Set the local, input, and output register names. We need to do this
2497 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2498 half. If we use in/loc/out register names, then we get assembler errors
2499 in crtn.S because there is no alloc insn or regstk directive in there. */
2500 if (! TARGET_REG_NAMES)
2502 int inputs = current_frame_info.n_input_regs;
2503 int locals = current_frame_info.n_local_regs;
2504 int outputs = current_frame_info.n_output_regs;
2506 for (i = 0; i < inputs; i++)
2507 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2508 for (i = 0; i < locals; i++)
2509 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2510 for (i = 0; i < outputs; i++)
2511 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2514 /* Set the frame pointer register name. The regnum is logically loc79,
2515 but of course we'll not have allocated that many locals. Rather than
2516 worrying about renumbering the existing rtxs, we adjust the name. */
2517 /* ??? This code means that we can never use one local register when
2518 there is a frame pointer. loc79 gets wasted in this case, as it is
2519 renamed to a register that will never be used. See also the try_locals
2520 code in find_gr_spill. */
2521 if (current_frame_info.reg_fp)
2523 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2524 reg_names[HARD_FRAME_POINTER_REGNUM]
2525 = reg_names[current_frame_info.reg_fp];
2526 reg_names[current_frame_info.reg_fp] = tmp;
2529 /* We don't need an alloc instruction if we've used no outputs or locals. */
2530 if (current_frame_info.n_local_regs == 0
2531 && current_frame_info.n_output_regs == 0
2532 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2533 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2535 /* If there is no alloc, but there are input registers used, then we
2536 need a .regstk directive. */
2537 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2538 ar_pfs_save_reg = NULL_RTX;
2542 current_frame_info.need_regstk = 0;
2544 if (current_frame_info.reg_save_ar_pfs)
2545 regno = current_frame_info.reg_save_ar_pfs;
2547 regno = next_scratch_gr_reg ();
2548 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2550 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2551 GEN_INT (current_frame_info.n_input_regs),
2552 GEN_INT (current_frame_info.n_local_regs),
2553 GEN_INT (current_frame_info.n_output_regs),
2554 GEN_INT (current_frame_info.n_rotate_regs)));
2555 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2558 /* Set up frame pointer, stack pointer, and spill iterators. */
2560 n_varargs = cfun->machine->n_varargs;
2561 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2562 stack_pointer_rtx, 0);
2564 if (frame_pointer_needed)
2566 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2567 RTX_FRAME_RELATED_P (insn) = 1;
2570 if (current_frame_info.total_size != 0)
2572 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2575 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2576 offset = frame_size_rtx;
2579 regno = next_scratch_gr_reg ();
2580 offset = gen_rtx_REG (DImode, regno);
2581 emit_move_insn (offset, frame_size_rtx);
2584 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2585 stack_pointer_rtx, offset));
2587 if (! frame_pointer_needed)
2589 RTX_FRAME_RELATED_P (insn) = 1;
2590 if (GET_CODE (offset) != CONST_INT)
2593 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2594 gen_rtx_SET (VOIDmode,
2596 gen_rtx_PLUS (DImode,
2603 /* ??? At this point we must generate a magic insn that appears to
2604 modify the stack pointer, the frame pointer, and all spill
2605 iterators. This would allow the most scheduling freedom. For
2606 now, just hard stop. */
2607 emit_insn (gen_blockage ());
2610 /* Must copy out ar.unat before doing any integer spills. */
2611 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2613 if (current_frame_info.reg_save_ar_unat)
2615 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2618 alt_regno = next_scratch_gr_reg ();
2619 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2620 current_frame_info.gr_used_mask |= 1 << alt_regno;
2623 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2624 insn = emit_move_insn (ar_unat_save_reg, reg);
2625 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2627 /* Even if we're not going to generate an epilogue, we still
2628 need to save the register so that EH works. */
2629 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2630 emit_insn (gen_prologue_use (ar_unat_save_reg));
2633 ar_unat_save_reg = NULL_RTX;
2635 /* Spill all varargs registers. Do this before spilling any GR registers,
2636 since we want the UNAT bits for the GR registers to override the UNAT
2637 bits from varargs, which we don't care about. */
2640 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2642 reg = gen_rtx_REG (DImode, regno);
2643 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2646 /* Locate the bottom of the register save area. */
2647 cfa_off = (current_frame_info.spill_cfa_off
2648 + current_frame_info.spill_size
2649 + current_frame_info.extra_spill_size);
2651 /* Save the predicate register block either in a register or in memory. */
2652 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2654 reg = gen_rtx_REG (DImode, PR_REG (0));
2655 if (current_frame_info.reg_save_pr != 0)
2657 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2658 insn = emit_move_insn (alt_reg, reg);
2660 /* ??? Denote pr spill/fill by a DImode move that modifies all
2661 64 hard registers. */
2662 RTX_FRAME_RELATED_P (insn) = 1;
2664 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2665 gen_rtx_SET (VOIDmode, alt_reg, reg),
2668 /* Even if we're not going to generate an epilogue, we still
2669 need to save the register so that EH works. */
2671 emit_insn (gen_prologue_use (alt_reg));
2675 alt_regno = next_scratch_gr_reg ();
2676 alt_reg = gen_rtx_REG (DImode, alt_regno);
2677 insn = emit_move_insn (alt_reg, reg);
2678 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2683 /* Handle AR regs in numerical order. All of them get special handling. */
2684 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2685 && current_frame_info.reg_save_ar_unat == 0)
2687 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2688 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2692 /* The alloc insn already copied ar.pfs into a general register. The
2693 only thing we have to do now is copy that register to a stack slot
2694 if we'd not allocated a local register for the job. */
2695 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2696 && current_frame_info.reg_save_ar_pfs == 0)
2698 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2699 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2703 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2705 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2706 if (current_frame_info.reg_save_ar_lc != 0)
2708 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2709 insn = emit_move_insn (alt_reg, reg);
2710 RTX_FRAME_RELATED_P (insn) = 1;
2712 /* Even if we're not going to generate an epilogue, we still
2713 need to save the register so that EH works. */
2715 emit_insn (gen_prologue_use (alt_reg));
2719 alt_regno = next_scratch_gr_reg ();
2720 alt_reg = gen_rtx_REG (DImode, alt_regno);
2721 emit_move_insn (alt_reg, reg);
2722 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2727 if (current_frame_info.reg_save_gp)
2729 insn = emit_move_insn (gen_rtx_REG (DImode,
2730 current_frame_info.reg_save_gp),
2731 pic_offset_table_rtx);
2732 /* We don't know for sure yet if this is actually needed, since
2733 we've not split the PIC call patterns. If all of the calls
2734 are indirect, and not followed by any uses of the gp, then
2735 this save is dead. Allow it to go away. */
2737 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2740 /* We should now be at the base of the gr/br/fr spill area. */
2741 if (cfa_off != (current_frame_info.spill_cfa_off
2742 + current_frame_info.spill_size))
2745 /* Spill all general registers. */
2746 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2747 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2749 reg = gen_rtx_REG (DImode, regno);
2750 do_spill (gen_gr_spill, reg, cfa_off, reg);
2754 /* Handle BR0 specially -- it may be getting stored permanently in
2755 some GR register. */
2756 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2758 reg = gen_rtx_REG (DImode, BR_REG (0));
2759 if (current_frame_info.reg_save_b0 != 0)
2761 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2762 insn = emit_move_insn (alt_reg, reg);
2763 RTX_FRAME_RELATED_P (insn) = 1;
2765 /* Even if we're not going to generate an epilogue, we still
2766 need to save the register so that EH works. */
2768 emit_insn (gen_prologue_use (alt_reg));
2772 alt_regno = next_scratch_gr_reg ();
2773 alt_reg = gen_rtx_REG (DImode, alt_regno);
2774 emit_move_insn (alt_reg, reg);
2775 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2780 /* Spill the rest of the BR registers. */
2781 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2782 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2784 alt_regno = next_scratch_gr_reg ();
2785 alt_reg = gen_rtx_REG (DImode, alt_regno);
2786 reg = gen_rtx_REG (DImode, regno);
2787 emit_move_insn (alt_reg, reg);
2788 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2792 /* Align the frame and spill all FR registers. */
2793 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2794 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2798 reg = gen_rtx_REG (XFmode, regno);
2799 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2803 if (cfa_off != current_frame_info.spill_cfa_off)
2806 finish_spill_pointers ();
2809 /* Called after register allocation to add any instructions needed for the
2810 epilogue. Using an epilogue insn is favored compared to putting all of the
2811 instructions in output_function_prologue(), since it allows the scheduler
2812 to intermix instructions with the saves of the caller saved registers. In
2813 some cases, it might be necessary to emit a barrier instruction as the last
2814 insn to prevent such scheduling. */
2817 ia64_expand_epilogue (int sibcall_p)
2819 rtx insn, reg, alt_reg, ar_unat_save_reg;
2820 int regno, alt_regno, cfa_off;
2822 ia64_compute_frame_size (get_frame_size ());
2824 /* If there is a frame pointer, then we use it instead of the stack
2825 pointer, so that the stack pointer does not need to be valid when
2826 the epilogue starts. See EXIT_IGNORE_STACK. */
2827 if (frame_pointer_needed)
2828 setup_spill_pointers (current_frame_info.n_spilled,
2829 hard_frame_pointer_rtx, 0);
2831 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2832 current_frame_info.total_size);
2834 if (current_frame_info.total_size != 0)
2836 /* ??? At this point we must generate a magic insn that appears to
2837 modify the spill iterators and the frame pointer. This would
2838 allow the most scheduling freedom. For now, just hard stop. */
2839 emit_insn (gen_blockage ());
2842 /* Locate the bottom of the register save area. */
2843 cfa_off = (current_frame_info.spill_cfa_off
2844 + current_frame_info.spill_size
2845 + current_frame_info.extra_spill_size);
2847 /* Restore the predicate registers. */
2848 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2850 if (current_frame_info.reg_save_pr != 0)
2851 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2854 alt_regno = next_scratch_gr_reg ();
2855 alt_reg = gen_rtx_REG (DImode, alt_regno);
2856 do_restore (gen_movdi_x, alt_reg, cfa_off);
2859 reg = gen_rtx_REG (DImode, PR_REG (0));
2860 emit_move_insn (reg, alt_reg);
2863 /* Restore the application registers. */
2865 /* Load the saved unat from the stack, but do not restore it until
2866 after the GRs have been restored. */
2867 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2869 if (current_frame_info.reg_save_ar_unat != 0)
2871 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2874 alt_regno = next_scratch_gr_reg ();
2875 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2876 current_frame_info.gr_used_mask |= 1 << alt_regno;
2877 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2882 ar_unat_save_reg = NULL_RTX;
2884 if (current_frame_info.reg_save_ar_pfs != 0)
2886 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2887 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2888 emit_move_insn (reg, alt_reg);
2890 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2892 alt_regno = next_scratch_gr_reg ();
2893 alt_reg = gen_rtx_REG (DImode, alt_regno);
2894 do_restore (gen_movdi_x, alt_reg, cfa_off);
2896 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2897 emit_move_insn (reg, alt_reg);
2900 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2902 if (current_frame_info.reg_save_ar_lc != 0)
2903 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2906 alt_regno = next_scratch_gr_reg ();
2907 alt_reg = gen_rtx_REG (DImode, alt_regno);
2908 do_restore (gen_movdi_x, alt_reg, cfa_off);
2911 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2912 emit_move_insn (reg, alt_reg);
2915 /* We should now be at the base of the gr/br/fr spill area. */
2916 if (cfa_off != (current_frame_info.spill_cfa_off
2917 + current_frame_info.spill_size))
2920 /* The GP may be stored on the stack in the prologue, but it's
2921 never restored in the epilogue. Skip the stack slot. */
2922 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
2925 /* Restore all general registers. */
2926 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
2927 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2929 reg = gen_rtx_REG (DImode, regno);
2930 do_restore (gen_gr_restore, reg, cfa_off);
2934 /* Restore the branch registers. Handle B0 specially, as it may
2935 have gotten stored in some GR register. */
2936 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2938 if (current_frame_info.reg_save_b0 != 0)
2939 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2942 alt_regno = next_scratch_gr_reg ();
2943 alt_reg = gen_rtx_REG (DImode, alt_regno);
2944 do_restore (gen_movdi_x, alt_reg, cfa_off);
2947 reg = gen_rtx_REG (DImode, BR_REG (0));
2948 emit_move_insn (reg, alt_reg);
2951 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2952 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2954 alt_regno = next_scratch_gr_reg ();
2955 alt_reg = gen_rtx_REG (DImode, alt_regno);
2956 do_restore (gen_movdi_x, alt_reg, cfa_off);
2958 reg = gen_rtx_REG (DImode, regno);
2959 emit_move_insn (reg, alt_reg);
2962 /* Restore floating point registers. */
2963 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2964 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2968 reg = gen_rtx_REG (XFmode, regno);
2969 do_restore (gen_fr_restore_x, reg, cfa_off);
2973 /* Restore ar.unat for real. */
2974 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2976 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2977 emit_move_insn (reg, ar_unat_save_reg);
2980 if (cfa_off != current_frame_info.spill_cfa_off)
2983 finish_spill_pointers ();
2985 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2987 /* ??? At this point we must generate a magic insn that appears to
2988 modify the spill iterators, the stack pointer, and the frame
2989 pointer. This would allow the most scheduling freedom. For now,
2991 emit_insn (gen_blockage ());
2994 if (cfun->machine->ia64_eh_epilogue_sp)
2995 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2996 else if (frame_pointer_needed)
2998 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2999 RTX_FRAME_RELATED_P (insn) = 1;
3001 else if (current_frame_info.total_size)
3003 rtx offset, frame_size_rtx;
3005 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3006 if (CONST_OK_FOR_I (current_frame_info.total_size))
3007 offset = frame_size_rtx;
3010 regno = next_scratch_gr_reg ();
3011 offset = gen_rtx_REG (DImode, regno);
3012 emit_move_insn (offset, frame_size_rtx);
3015 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3018 RTX_FRAME_RELATED_P (insn) = 1;
3019 if (GET_CODE (offset) != CONST_INT)
3022 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3023 gen_rtx_SET (VOIDmode,
3025 gen_rtx_PLUS (DImode,
3032 if (cfun->machine->ia64_eh_epilogue_bsp)
3033 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3036 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3039 int fp = GR_REG (2);
3040 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3041 first available call clobbered register. If there was a frame_pointer
3042 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3043 so we have to make sure we're using the string "r2" when emitting
3044 the register name for the assembler. */
3045 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3046 fp = HARD_FRAME_POINTER_REGNUM;
3048 /* We must emit an alloc to force the input registers to become output
3049 registers. Otherwise, if the callee tries to pass its parameters
3050 through to another call without an intervening alloc, then these
3052 /* ??? We don't need to preserve all input registers. We only need to
3053 preserve those input registers used as arguments to the sibling call.
3054 It is unclear how to compute that number here. */
3055 if (current_frame_info.n_input_regs != 0)
3056 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3057 GEN_INT (0), GEN_INT (0),
3058 GEN_INT (current_frame_info.n_input_regs),
3063 /* Return 1 if br.ret can do all the work required to return from a
3067 ia64_direct_return (void)
3069 if (reload_completed && ! frame_pointer_needed)
3071 ia64_compute_frame_size (get_frame_size ());
3073 return (current_frame_info.total_size == 0
3074 && current_frame_info.n_spilled == 0
3075 && current_frame_info.reg_save_b0 == 0
3076 && current_frame_info.reg_save_pr == 0
3077 && current_frame_info.reg_save_ar_pfs == 0
3078 && current_frame_info.reg_save_ar_unat == 0
3079 && current_frame_info.reg_save_ar_lc == 0);
3084 /* Return the magic cookie that we use to hold the return address
3085 during early compilation. */
3088 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3092 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3095 /* Split this value after reload, now that we know where the return
3096 address is saved. */
3099 ia64_split_return_addr_rtx (rtx dest)
3103 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3105 if (current_frame_info.reg_save_b0 != 0)
3106 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3112 /* Compute offset from CFA for BR0. */
3113 /* ??? Must be kept in sync with ia64_expand_prologue. */
3114 off = (current_frame_info.spill_cfa_off
3115 + current_frame_info.spill_size);
3116 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3117 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3120 /* Convert CFA offset to a register based offset. */
3121 if (frame_pointer_needed)
3122 src = hard_frame_pointer_rtx;
3125 src = stack_pointer_rtx;
3126 off += current_frame_info.total_size;
3129 /* Load address into scratch register. */
3130 if (CONST_OK_FOR_I (off))
3131 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3134 emit_move_insn (dest, GEN_INT (off));
3135 emit_insn (gen_adddi3 (dest, src, dest));
3138 src = gen_rtx_MEM (Pmode, dest);
3142 src = gen_rtx_REG (DImode, BR_REG (0));
3144 emit_move_insn (dest, src);
3148 ia64_hard_regno_rename_ok (int from, int to)
3150 /* Don't clobber any of the registers we reserved for the prologue. */
3151 if (to == current_frame_info.reg_fp
3152 || to == current_frame_info.reg_save_b0
3153 || to == current_frame_info.reg_save_pr
3154 || to == current_frame_info.reg_save_ar_pfs
3155 || to == current_frame_info.reg_save_ar_unat
3156 || to == current_frame_info.reg_save_ar_lc)
3159 if (from == current_frame_info.reg_fp
3160 || from == current_frame_info.reg_save_b0
3161 || from == current_frame_info.reg_save_pr
3162 || from == current_frame_info.reg_save_ar_pfs
3163 || from == current_frame_info.reg_save_ar_unat
3164 || from == current_frame_info.reg_save_ar_lc)
3167 /* Don't use output registers outside the register frame. */
3168 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3171 /* Retain even/oddness on predicate register pairs. */
3172 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3173 return (from & 1) == (to & 1);
3178 /* Target hook for assembling integer objects. Handle word-sized
3179 aligned objects and detect the cases when @fptr is needed. */
3182 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3184 if (size == POINTER_SIZE / BITS_PER_UNIT
3186 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3187 && GET_CODE (x) == SYMBOL_REF
3188 && SYMBOL_REF_FUNCTION_P (x))
3190 if (POINTER_SIZE == 32)
3191 fputs ("\tdata4\t@fptr(", asm_out_file);
3193 fputs ("\tdata8\t@fptr(", asm_out_file);
3194 output_addr_const (asm_out_file, x);
3195 fputs (")\n", asm_out_file);
3198 return default_assemble_integer (x, size, aligned_p);
3201 /* Emit the function prologue. */
3204 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3206 int mask, grsave, grsave_prev;
3208 if (current_frame_info.need_regstk)
3209 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3210 current_frame_info.n_input_regs,
3211 current_frame_info.n_local_regs,
3212 current_frame_info.n_output_regs,
3213 current_frame_info.n_rotate_regs);
3215 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3218 /* Emit the .prologue directive. */
3221 grsave = grsave_prev = 0;
3222 if (current_frame_info.reg_save_b0 != 0)
3225 grsave = grsave_prev = current_frame_info.reg_save_b0;
3227 if (current_frame_info.reg_save_ar_pfs != 0
3228 && (grsave_prev == 0
3229 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3232 if (grsave_prev == 0)
3233 grsave = current_frame_info.reg_save_ar_pfs;
3234 grsave_prev = current_frame_info.reg_save_ar_pfs;
3236 if (current_frame_info.reg_fp != 0
3237 && (grsave_prev == 0
3238 || current_frame_info.reg_fp == grsave_prev + 1))
3241 if (grsave_prev == 0)
3242 grsave = HARD_FRAME_POINTER_REGNUM;
3243 grsave_prev = current_frame_info.reg_fp;
3245 if (current_frame_info.reg_save_pr != 0
3246 && (grsave_prev == 0
3247 || current_frame_info.reg_save_pr == grsave_prev + 1))
3250 if (grsave_prev == 0)
3251 grsave = current_frame_info.reg_save_pr;
3254 if (mask && TARGET_GNU_AS)
3255 fprintf (file, "\t.prologue %d, %d\n", mask,
3256 ia64_dbx_register_number (grsave));
3258 fputs ("\t.prologue\n", file);
3260 /* Emit a .spill directive, if necessary, to relocate the base of
3261 the register spill area. */
3262 if (current_frame_info.spill_cfa_off != -16)
3263 fprintf (file, "\t.spill %ld\n",
3264 (long) (current_frame_info.spill_cfa_off
3265 + current_frame_info.spill_size));
3268 /* Emit the .body directive at the scheduled end of the prologue. */
3271 ia64_output_function_end_prologue (FILE *file)
3273 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3276 fputs ("\t.body\n", file);
3279 /* Emit the function epilogue. */
3282 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3283 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3287 if (current_frame_info.reg_fp)
3289 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3290 reg_names[HARD_FRAME_POINTER_REGNUM]
3291 = reg_names[current_frame_info.reg_fp];
3292 reg_names[current_frame_info.reg_fp] = tmp;
3294 if (! TARGET_REG_NAMES)
3296 for (i = 0; i < current_frame_info.n_input_regs; i++)
3297 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3298 for (i = 0; i < current_frame_info.n_local_regs; i++)
3299 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3300 for (i = 0; i < current_frame_info.n_output_regs; i++)
3301 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3304 current_frame_info.initialized = 0;
3308 ia64_dbx_register_number (int regno)
3310 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3311 from its home at loc79 to something inside the register frame. We
3312 must perform the same renumbering here for the debug info. */
3313 if (current_frame_info.reg_fp)
3315 if (regno == HARD_FRAME_POINTER_REGNUM)
3316 regno = current_frame_info.reg_fp;
3317 else if (regno == current_frame_info.reg_fp)
3318 regno = HARD_FRAME_POINTER_REGNUM;
3321 if (IN_REGNO_P (regno))
3322 return 32 + regno - IN_REG (0);
3323 else if (LOC_REGNO_P (regno))
3324 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3325 else if (OUT_REGNO_P (regno))
3326 return (32 + current_frame_info.n_input_regs
3327 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3333 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3335 rtx addr_reg, eight = GEN_INT (8);
3337 /* The Intel assembler requires that the global __ia64_trampoline symbol
3338 be declared explicitly */
3341 static bool declared_ia64_trampoline = false;
3343 if (!declared_ia64_trampoline)
3345 declared_ia64_trampoline = true;
3346 (*targetm.asm_out.globalize_label) (asm_out_file,
3347 "__ia64_trampoline");
3351 /* Load up our iterator. */
3352 addr_reg = gen_reg_rtx (Pmode);
3353 emit_move_insn (addr_reg, addr);
3355 /* The first two words are the fake descriptor:
3356 __ia64_trampoline, ADDR+16. */
3357 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3358 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3359 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3361 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3362 copy_to_reg (plus_constant (addr, 16)));
3363 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3365 /* The third word is the target descriptor. */
3366 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3367 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3369 /* The fourth word is the static chain. */
3370 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3373 /* Do any needed setup for a variadic function. CUM has not been updated
3374 for the last named argument which has type TYPE and mode MODE.
3376 We generate the actual spill instructions during prologue generation. */
3379 ia64_setup_incoming_varargs (CUMULATIVE_ARGS cum, int int_mode, tree type,
3381 int second_time ATTRIBUTE_UNUSED)
3383 /* Skip the current argument. */
3384 ia64_function_arg_advance (&cum, int_mode, type, 1);
3386 if (cum.words < MAX_ARGUMENT_SLOTS)
3388 int n = MAX_ARGUMENT_SLOTS - cum.words;
3389 *pretend_size = n * UNITS_PER_WORD;
3390 cfun->machine->n_varargs = n;
3394 /* Check whether TYPE is a homogeneous floating point aggregate. If
3395 it is, return the mode of the floating point type that appears
3396 in all leafs. If it is not, return VOIDmode.
3398 An aggregate is a homogeneous floating point aggregate is if all
3399 fields/elements in it have the same floating point type (e.g,
3400 SFmode). 128-bit quad-precision floats are excluded. */
3402 static enum machine_mode
3403 hfa_element_mode (tree type, int nested)
3405 enum machine_mode element_mode = VOIDmode;
3406 enum machine_mode mode;
3407 enum tree_code code = TREE_CODE (type);
3408 int know_element_mode = 0;
3413 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3414 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3415 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3416 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
3420 /* Fortran complex types are supposed to be HFAs, so we need to handle
3421 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3424 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3425 && TYPE_MODE (type) != TCmode)
3426 return GET_MODE_INNER (TYPE_MODE (type));
3431 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3432 mode if this is contained within an aggregate. */
3433 if (nested && TYPE_MODE (type) != TFmode)
3434 return TYPE_MODE (type);
3439 return hfa_element_mode (TREE_TYPE (type), 1);
3443 case QUAL_UNION_TYPE:
3444 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3446 if (TREE_CODE (t) != FIELD_DECL)
3449 mode = hfa_element_mode (TREE_TYPE (t), 1);
3450 if (know_element_mode)
3452 if (mode != element_mode)
3455 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3459 know_element_mode = 1;
3460 element_mode = mode;
3463 return element_mode;
3466 /* If we reach here, we probably have some front-end specific type
3467 that the backend doesn't know about. This can happen via the
3468 aggregate_value_p call in init_function_start. All we can do is
3469 ignore unknown tree types. */
3476 /* Return the number of words required to hold a quantity of TYPE and MODE
3477 when passed as an argument. */
3479 ia64_function_arg_words (tree type, enum machine_mode mode)
3483 if (mode == BLKmode)
3484 words = int_size_in_bytes (type);
3486 words = GET_MODE_SIZE (mode);
3488 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3491 /* Return the number of registers that should be skipped so the current
3492 argument (described by TYPE and WORDS) will be properly aligned.
3494 Integer and float arguments larger than 8 bytes start at the next
3495 even boundary. Aggregates larger than 8 bytes start at the next
3496 even boundary if the aggregate has 16 byte alignment. Note that
3497 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3498 but are still to be aligned in registers.
3500 ??? The ABI does not specify how to handle aggregates with
3501 alignment from 9 to 15 bytes, or greater than 16. We handle them
3502 all as if they had 16 byte alignment. Such aggregates can occur
3503 only if gcc extensions are used. */
3505 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3507 if ((cum->words & 1) == 0)
3511 && TREE_CODE (type) != INTEGER_TYPE
3512 && TREE_CODE (type) != REAL_TYPE)
3513 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3518 /* Return rtx for register where argument is passed, or zero if it is passed
3520 /* ??? 128-bit quad-precision floats are always passed in general
3524 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3525 int named, int incoming)
3527 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3528 int words = ia64_function_arg_words (type, mode);
3529 int offset = ia64_function_arg_offset (cum, type, words);
3530 enum machine_mode hfa_mode = VOIDmode;
3532 /* If all argument slots are used, then it must go on the stack. */
3533 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3536 /* Check for and handle homogeneous FP aggregates. */
3538 hfa_mode = hfa_element_mode (type, 0);
3540 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3541 and unprototyped hfas are passed specially. */
3542 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3546 int fp_regs = cum->fp_regs;
3547 int int_regs = cum->words + offset;
3548 int hfa_size = GET_MODE_SIZE (hfa_mode);
3552 /* If prototyped, pass it in FR regs then GR regs.
3553 If not prototyped, pass it in both FR and GR regs.
3555 If this is an SFmode aggregate, then it is possible to run out of
3556 FR regs while GR regs are still left. In that case, we pass the
3557 remaining part in the GR regs. */
3559 /* Fill the FP regs. We do this always. We stop if we reach the end
3560 of the argument, the last FP register, or the last argument slot. */
3562 byte_size = ((mode == BLKmode)
3563 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3564 args_byte_size = int_regs * UNITS_PER_WORD;
3566 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3567 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3569 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3570 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3574 args_byte_size += hfa_size;
3578 /* If no prototype, then the whole thing must go in GR regs. */
3579 if (! cum->prototype)
3581 /* If this is an SFmode aggregate, then we might have some left over
3582 that needs to go in GR regs. */
3583 else if (byte_size != offset)
3584 int_regs += offset / UNITS_PER_WORD;
3586 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3588 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3590 enum machine_mode gr_mode = DImode;
3592 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3593 then this goes in a GR reg left adjusted/little endian, right
3594 adjusted/big endian. */
3595 /* ??? Currently this is handled wrong, because 4-byte hunks are
3596 always right adjusted/little endian. */
3599 /* If we have an even 4 byte hunk because the aggregate is a
3600 multiple of 4 bytes in size, then this goes in a GR reg right
3601 adjusted/little endian. */
3602 else if (byte_size - offset == 4)
3604 /* Complex floats need to have float mode. */
3605 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3608 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3609 gen_rtx_REG (gr_mode, (basereg
3612 offset += GET_MODE_SIZE (gr_mode);
3613 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3614 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3617 /* If we ended up using just one location, just return that one loc, but
3618 change the mode back to the argument mode. */
3620 return gen_rtx_REG (mode, REGNO (XEXP (loc[0], 0)));
3622 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3625 /* Integral and aggregates go in general registers. If we have run out of
3626 FR registers, then FP values must also go in general registers. This can
3627 happen when we have a SFmode HFA. */
3628 else if (mode == TFmode || mode == TCmode
3629 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3631 int byte_size = ((mode == BLKmode)
3632 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3633 if (BYTES_BIG_ENDIAN
3634 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3635 && byte_size < UNITS_PER_WORD
3638 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3639 gen_rtx_REG (DImode,
3640 (basereg + cum->words
3643 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3646 return gen_rtx_REG (mode, basereg + cum->words + offset);
3650 /* If there is a prototype, then FP values go in a FR register when
3651 named, and in a GR register when unnamed. */
3652 else if (cum->prototype)
3655 return gen_rtx_REG (mode, basereg + cum->words + offset);
3657 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3659 /* If there is no prototype, then FP values go in both FR and GR
3663 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3664 gen_rtx_REG (mode, (FR_ARG_FIRST
3667 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3669 (basereg + cum->words
3673 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3677 /* Return number of words, at the beginning of the argument, that must be
3678 put in registers. 0 is the argument is entirely in registers or entirely
3682 ia64_function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3683 tree type, int named ATTRIBUTE_UNUSED)
3685 int words = ia64_function_arg_words (type, mode);
3686 int offset = ia64_function_arg_offset (cum, type, words);
3688 /* If all argument slots are used, then it must go on the stack. */
3689 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3692 /* It doesn't matter whether the argument goes in FR or GR regs. If
3693 it fits within the 8 argument slots, then it goes entirely in
3694 registers. If it extends past the last argument slot, then the rest
3695 goes on the stack. */
3697 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3700 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3703 /* Update CUM to point after this argument. This is patterned after
3704 ia64_function_arg. */
3707 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3708 tree type, int named)
3710 int words = ia64_function_arg_words (type, mode);
3711 int offset = ia64_function_arg_offset (cum, type, words);
3712 enum machine_mode hfa_mode = VOIDmode;
3714 /* If all arg slots are already full, then there is nothing to do. */
3715 if (cum->words >= MAX_ARGUMENT_SLOTS)
3718 cum->words += words + offset;
3720 /* Check for and handle homogeneous FP aggregates. */
3722 hfa_mode = hfa_element_mode (type, 0);
3724 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3725 and unprototyped hfas are passed specially. */
3726 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3728 int fp_regs = cum->fp_regs;
3729 /* This is the original value of cum->words + offset. */
3730 int int_regs = cum->words - words;
3731 int hfa_size = GET_MODE_SIZE (hfa_mode);
3735 /* If prototyped, pass it in FR regs then GR regs.
3736 If not prototyped, pass it in both FR and GR regs.
3738 If this is an SFmode aggregate, then it is possible to run out of
3739 FR regs while GR regs are still left. In that case, we pass the
3740 remaining part in the GR regs. */
3742 /* Fill the FP regs. We do this always. We stop if we reach the end
3743 of the argument, the last FP register, or the last argument slot. */
3745 byte_size = ((mode == BLKmode)
3746 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3747 args_byte_size = int_regs * UNITS_PER_WORD;
3749 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3750 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3753 args_byte_size += hfa_size;
3757 cum->fp_regs = fp_regs;
3760 /* Integral and aggregates go in general registers. If we have run out of
3761 FR registers, then FP values must also go in general registers. This can
3762 happen when we have a SFmode HFA. */
3763 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3764 cum->int_regs = cum->words;
3766 /* If there is a prototype, then FP values go in a FR register when
3767 named, and in a GR register when unnamed. */
3768 else if (cum->prototype)
3771 cum->int_regs = cum->words;
3773 /* ??? Complex types should not reach here. */
3774 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3776 /* If there is no prototype, then FP values go in both FR and GR
3780 /* ??? Complex types should not reach here. */
3781 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3782 cum->int_regs = cum->words;
3786 /* Variable sized types are passed by reference. */
3787 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3790 ia64_function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3791 enum machine_mode mode ATTRIBUTE_UNUSED,
3792 tree type, int named ATTRIBUTE_UNUSED)
3794 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3797 /* True if it is OK to do sibling call optimization for the specified
3798 call expression EXP. DECL will be the called function, or NULL if
3799 this is an indirect call. */
3801 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3803 /* We must always return with our current GP. This means we can
3804 only sibcall to functions defined in the current module. */
3805 return decl && (*targetm.binds_local_p) (decl);
3809 /* Implement va_arg. */
3812 ia64_va_arg (tree valist, tree type)
3816 /* Variable sized types are passed by reference. */
3817 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3819 rtx addr = force_reg (ptr_mode,
3820 std_expand_builtin_va_arg (valist, build_pointer_type (type)));
3821 #ifdef POINTERS_EXTEND_UNSIGNED
3822 addr = convert_memory_address (Pmode, addr);
3824 return gen_rtx_MEM (ptr_mode, addr);
3827 /* Aggregate arguments with alignment larger than 8 bytes start at
3828 the next even boundary. Integer and floating point arguments
3829 do so if they are larger than 8 bytes, whether or not they are
3830 also aligned larger than 8 bytes. */
3831 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
3832 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3834 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3835 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3836 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3837 build_int_2 (-2 * UNITS_PER_WORD, -1));
3838 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3839 TREE_SIDE_EFFECTS (t) = 1;
3840 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3843 return std_expand_builtin_va_arg (valist, type);
3846 /* Return 1 if function return value returned in memory. Return 0 if it is
3850 ia64_return_in_memory (tree valtype)
3852 enum machine_mode mode;
3853 enum machine_mode hfa_mode;
3854 HOST_WIDE_INT byte_size;
3856 mode = TYPE_MODE (valtype);
3857 byte_size = GET_MODE_SIZE (mode);
3858 if (mode == BLKmode)
3860 byte_size = int_size_in_bytes (valtype);
3865 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3867 hfa_mode = hfa_element_mode (valtype, 0);
3868 if (hfa_mode != VOIDmode)
3870 int hfa_size = GET_MODE_SIZE (hfa_mode);
3872 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3877 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3883 /* Return rtx for register that holds the function return value. */
3886 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
3888 enum machine_mode mode;
3889 enum machine_mode hfa_mode;
3891 mode = TYPE_MODE (valtype);
3892 hfa_mode = hfa_element_mode (valtype, 0);
3894 if (hfa_mode != VOIDmode)
3902 hfa_size = GET_MODE_SIZE (hfa_mode);
3903 byte_size = ((mode == BLKmode)
3904 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3906 for (i = 0; offset < byte_size; i++)
3908 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3909 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3915 return XEXP (loc[0], 0);
3917 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3919 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
3920 return gen_rtx_REG (mode, FR_ARG_FIRST);
3923 if (BYTES_BIG_ENDIAN
3924 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3932 bytesize = int_size_in_bytes (valtype);
3933 for (i = 0; offset < bytesize; i++)
3935 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3936 gen_rtx_REG (DImode,
3939 offset += UNITS_PER_WORD;
3941 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3944 return gen_rtx_REG (mode, GR_RET_FIRST);
3948 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
3949 We need to emit DTP-relative relocations. */
3952 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
3956 fputs ("\tdata8.ua\t@dtprel(", file);
3957 output_addr_const (file, x);
3961 /* Print a memory address as an operand to reference that memory location. */
3963 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3964 also call this from ia64_print_operand for memory addresses. */
3967 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
3968 rtx address ATTRIBUTE_UNUSED)
3972 /* Print an operand to an assembler instruction.
3973 C Swap and print a comparison operator.
3974 D Print an FP comparison operator.
3975 E Print 32 - constant, for SImode shifts as extract.
3976 e Print 64 - constant, for DImode rotates.
3977 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3978 a floating point register emitted normally.
3979 I Invert a predicate register by adding 1.
3980 J Select the proper predicate register for a condition.
3981 j Select the inverse predicate register for a condition.
3982 O Append .acq for volatile load.
3983 P Postincrement of a MEM.
3984 Q Append .rel for volatile store.
3985 S Shift amount for shladd instruction.
3986 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3987 for Intel assembler.
3988 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3989 for Intel assembler.
3990 r Print register name, or constant 0 as r0. HP compatibility for
3993 ia64_print_operand (FILE * file, rtx x, int code)
4000 /* Handled below. */
4005 enum rtx_code c = swap_condition (GET_CODE (x));
4006 fputs (GET_RTX_NAME (c), file);
4011 switch (GET_CODE (x))
4023 str = GET_RTX_NAME (GET_CODE (x));
4030 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4034 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4038 if (x == CONST0_RTX (GET_MODE (x)))
4039 str = reg_names [FR_REG (0)];
4040 else if (x == CONST1_RTX (GET_MODE (x)))
4041 str = reg_names [FR_REG (1)];
4042 else if (GET_CODE (x) == REG)
4043 str = reg_names [REGNO (x)];
4050 fputs (reg_names [REGNO (x) + 1], file);
4056 unsigned int regno = REGNO (XEXP (x, 0));
4057 if (GET_CODE (x) == EQ)
4061 fputs (reg_names [regno], file);
4066 if (MEM_VOLATILE_P (x))
4067 fputs(".acq", file);
4072 HOST_WIDE_INT value;
4074 switch (GET_CODE (XEXP (x, 0)))
4080 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4081 if (GET_CODE (x) == CONST_INT)
4083 else if (GET_CODE (x) == REG)
4085 fprintf (file, ", %s", reg_names[REGNO (x)]);
4093 value = GET_MODE_SIZE (GET_MODE (x));
4097 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4101 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4106 if (MEM_VOLATILE_P (x))
4107 fputs(".rel", file);
4111 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4115 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4117 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4123 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4125 const char *prefix = "0x";
4126 if (INTVAL (x) & 0x80000000)
4128 fprintf (file, "0xffffffff");
4131 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4137 /* If this operand is the constant zero, write it as register zero.
4138 Any register, zero, or CONST_INT value is OK here. */
4139 if (GET_CODE (x) == REG)
4140 fputs (reg_names[REGNO (x)], file);
4141 else if (x == CONST0_RTX (GET_MODE (x)))
4143 else if (GET_CODE (x) == CONST_INT)
4144 output_addr_const (file, x);
4146 output_operand_lossage ("invalid %%r value");
4153 /* For conditional branches, returns or calls, substitute
4154 sptk, dptk, dpnt, or spnt for %s. */
4155 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4158 int pred_val = INTVAL (XEXP (x, 0));
4160 /* Guess top and bottom 10% statically predicted. */
4161 if (pred_val < REG_BR_PROB_BASE / 50)
4163 else if (pred_val < REG_BR_PROB_BASE / 2)
4165 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4170 else if (GET_CODE (current_output_insn) == CALL_INSN)
4175 fputs (which, file);
4180 x = current_insn_predicate;
4183 unsigned int regno = REGNO (XEXP (x, 0));
4184 if (GET_CODE (x) == EQ)
4186 fprintf (file, "(%s) ", reg_names [regno]);
4191 output_operand_lossage ("ia64_print_operand: unknown code");
4195 switch (GET_CODE (x))
4197 /* This happens for the spill/restore instructions. */
4202 /* ... fall through ... */
4205 fputs (reg_names [REGNO (x)], file);
4210 rtx addr = XEXP (x, 0);
4211 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
4212 addr = XEXP (addr, 0);
4213 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4218 output_addr_const (file, x);
4225 /* Compute a (partial) cost for rtx X. Return true if the complete
4226 cost has been computed, and false if subexpressions should be
4227 scanned. In either case, *TOTAL contains the cost result. */
4228 /* ??? This is incomplete. */
4231 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4239 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4242 if (CONST_OK_FOR_I (INTVAL (x)))
4244 else if (CONST_OK_FOR_J (INTVAL (x)))
4247 *total = COSTS_N_INSNS (1);
4250 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4253 *total = COSTS_N_INSNS (1);
4258 *total = COSTS_N_INSNS (1);
4264 *total = COSTS_N_INSNS (3);
4268 /* For multiplies wider than HImode, we have to go to the FPU,
4269 which normally involves copies. Plus there's the latency
4270 of the multiply itself, and the latency of the instructions to
4271 transfer integer regs to FP regs. */
4272 /* ??? Check for FP mode. */
4273 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4274 *total = COSTS_N_INSNS (10);
4276 *total = COSTS_N_INSNS (2);
4284 *total = COSTS_N_INSNS (1);
4291 /* We make divide expensive, so that divide-by-constant will be
4292 optimized to a multiply. */
4293 *total = COSTS_N_INSNS (60);
4301 /* Calculate the cost of moving data from a register in class FROM to
4302 one in class TO, using MODE. */
4305 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4308 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4309 if (to == ADDL_REGS)
4311 if (from == ADDL_REGS)
4314 /* All costs are symmetric, so reduce cases by putting the
4315 lower number class as the destination. */
4318 enum reg_class tmp = to;
4319 to = from, from = tmp;
4322 /* Moving from FR<->GR in XFmode must be more expensive than 2,
4323 so that we get secondary memory reloads. Between FR_REGS,
4324 we have to make this at least as expensive as MEMORY_MOVE_COST
4325 to avoid spectacularly poor register class preferencing. */
4328 if (to != GR_REGS || from != GR_REGS)
4329 return MEMORY_MOVE_COST (mode, to, 0);
4337 /* Moving between PR registers takes two insns. */
4338 if (from == PR_REGS)
4340 /* Moving between PR and anything but GR is impossible. */
4341 if (from != GR_REGS)
4342 return MEMORY_MOVE_COST (mode, to, 0);
4346 /* Moving between BR and anything but GR is impossible. */
4347 if (from != GR_REGS && from != GR_AND_BR_REGS)
4348 return MEMORY_MOVE_COST (mode, to, 0);
4353 /* Moving between AR and anything but GR is impossible. */
4354 if (from != GR_REGS)
4355 return MEMORY_MOVE_COST (mode, to, 0);
4360 case GR_AND_FR_REGS:
4361 case GR_AND_BR_REGS:
4372 /* This function returns the register class required for a secondary
4373 register when copying between one of the registers in CLASS, and X,
4374 using MODE. A return value of NO_REGS means that no secondary register
4378 ia64_secondary_reload_class (enum reg_class class,
4379 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4383 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4384 regno = true_regnum (x);
4391 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4392 interaction. We end up with two pseudos with overlapping lifetimes
4393 both of which are equiv to the same constant, and both which need
4394 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4395 changes depending on the path length, which means the qty_first_reg
4396 check in make_regs_eqv can give different answers at different times.
4397 At some point I'll probably need a reload_indi pattern to handle
4400 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4401 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4402 non-general registers for good measure. */
4403 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4406 /* This is needed if a pseudo used as a call_operand gets spilled to a
4408 if (GET_CODE (x) == MEM)
4413 /* Need to go through general registers to get to other class regs. */
4414 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4417 /* This can happen when a paradoxical subreg is an operand to the
4419 /* ??? This shouldn't be necessary after instruction scheduling is
4420 enabled, because paradoxical subregs are not accepted by
4421 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4422 stop the paradoxical subreg stupidity in the *_operand functions
4424 if (GET_CODE (x) == MEM
4425 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4426 || GET_MODE (x) == QImode))
4429 /* This can happen because of the ior/and/etc patterns that accept FP
4430 registers as operands. If the third operand is a constant, then it
4431 needs to be reloaded into a FP register. */
4432 if (GET_CODE (x) == CONST_INT)
4435 /* This can happen because of register elimination in a muldi3 insn.
4436 E.g. `26107 * (unsigned long)&u'. */
4437 if (GET_CODE (x) == PLUS)
4442 /* ??? This happens if we cse/gcse a BImode value across a call,
4443 and the function has a nonlocal goto. This is because global
4444 does not allocate call crossing pseudos to hard registers when
4445 current_function_has_nonlocal_goto is true. This is relatively
4446 common for C++ programs that use exceptions. To reproduce,
4447 return NO_REGS and compile libstdc++. */
4448 if (GET_CODE (x) == MEM)
4451 /* This can happen when we take a BImode subreg of a DImode value,
4452 and that DImode value winds up in some non-GR register. */
4453 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4458 /* Since we have no offsettable memory addresses, we need a temporary
4459 to hold the address of the second word. */
4460 if (mode == TImode || mode == TFmode)
4472 /* Emit text to declare externally defined variables and functions, because
4473 the Intel assembler does not support undefined externals. */
4476 ia64_asm_output_external (FILE *file, tree decl, const char *name)
4478 int save_referenced;
4480 /* GNU as does not need anything here, but the HP linker does need
4481 something for external functions. */
4485 || TREE_CODE (decl) != FUNCTION_DECL
4486 || strstr (name, "__builtin_") == name))
4489 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4490 the linker when we do this, so we need to be careful not to do this for
4491 builtin functions which have no library equivalent. Unfortunately, we
4492 can't tell here whether or not a function will actually be called by
4493 expand_expr, so we pull in library functions even if we may not need
4495 if (! strcmp (name, "__builtin_next_arg")
4496 || ! strcmp (name, "alloca")
4497 || ! strcmp (name, "__builtin_constant_p")
4498 || ! strcmp (name, "__builtin_args_info"))
4502 ia64_hpux_add_extern_decl (name);
4505 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4507 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4508 if (TREE_CODE (decl) == FUNCTION_DECL)
4509 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4510 (*targetm.asm_out.globalize_label) (file, name);
4511 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4515 /* Parse the -mfixed-range= option string. */
4518 fix_range (const char *const_str)
4521 char *str, *dash, *comma;
4523 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4524 REG2 are either register names or register numbers. The effect
4525 of this option is to mark the registers in the range from REG1 to
4526 REG2 as ``fixed'' so they won't be used by the compiler. This is
4527 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4529 i = strlen (const_str);
4530 str = (char *) alloca (i + 1);
4531 memcpy (str, const_str, i + 1);
4535 dash = strchr (str, '-');
4538 warning ("value of -mfixed-range must have form REG1-REG2");
4543 comma = strchr (dash + 1, ',');
4547 first = decode_reg_name (str);
4550 warning ("unknown register name: %s", str);
4554 last = decode_reg_name (dash + 1);
4557 warning ("unknown register name: %s", dash + 1);
4565 warning ("%s-%s is an empty range", str, dash + 1);
4569 for (i = first; i <= last; ++i)
4570 fixed_regs[i] = call_used_regs[i] = 1;
4580 static struct machine_function *
4581 ia64_init_machine_status (void)
4583 return ggc_alloc_cleared (sizeof (struct machine_function));
4586 /* Handle TARGET_OPTIONS switches. */
4589 ia64_override_options (void)
4593 const char *const name; /* processor name or nickname. */
4594 const enum processor_type processor;
4596 const processor_alias_table[] =
4598 {"itanium", PROCESSOR_ITANIUM},
4599 {"itanium1", PROCESSOR_ITANIUM},
4600 {"merced", PROCESSOR_ITANIUM},
4601 {"itanium2", PROCESSOR_ITANIUM2},
4602 {"mckinley", PROCESSOR_ITANIUM2},
4605 int const pta_size = ARRAY_SIZE (processor_alias_table);
4608 if (TARGET_AUTO_PIC)
4609 target_flags |= MASK_CONST_GP;
4611 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4613 warning ("cannot optimize floating point division for both latency and throughput");
4614 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4617 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4619 warning ("cannot optimize integer division for both latency and throughput");
4620 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4623 if (TARGET_INLINE_SQRT_LAT && TARGET_INLINE_SQRT_THR)
4625 warning ("cannot optimize square root for both latency and throughput");
4626 target_flags &= ~MASK_INLINE_SQRT_THR;
4629 if (TARGET_INLINE_SQRT_LAT)
4631 warning ("not yet implemented: latency-optimized inline square root");
4632 target_flags &= ~MASK_INLINE_SQRT_LAT;
4635 if (ia64_fixed_range_string)
4636 fix_range (ia64_fixed_range_string);
4638 if (ia64_tls_size_string)
4641 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4642 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4643 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4645 ia64_tls_size = tmp;
4648 if (!ia64_tune_string)
4649 ia64_tune_string = "itanium2";
4651 for (i = 0; i < pta_size; i++)
4652 if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
4654 ia64_tune = processor_alias_table[i].processor;
4659 error ("bad value (%s) for -tune= switch", ia64_tune_string);
4661 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4662 flag_schedule_insns_after_reload = 0;
4664 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4666 init_machine_status = ia64_init_machine_status;
4669 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4670 static enum attr_type ia64_safe_type (rtx);
4672 static enum attr_itanium_class
4673 ia64_safe_itanium_class (rtx insn)
4675 if (recog_memoized (insn) >= 0)
4676 return get_attr_itanium_class (insn);
4678 return ITANIUM_CLASS_UNKNOWN;
4681 static enum attr_type
4682 ia64_safe_type (rtx insn)
4684 if (recog_memoized (insn) >= 0)
4685 return get_attr_type (insn);
4687 return TYPE_UNKNOWN;
4690 /* The following collection of routines emit instruction group stop bits as
4691 necessary to avoid dependencies. */
4693 /* Need to track some additional registers as far as serialization is
4694 concerned so we can properly handle br.call and br.ret. We could
4695 make these registers visible to gcc, but since these registers are
4696 never explicitly used in gcc generated code, it seems wasteful to
4697 do so (plus it would make the call and return patterns needlessly
4699 #define REG_GP (GR_REG (1))
4700 #define REG_RP (BR_REG (0))
4701 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4702 /* This is used for volatile asms which may require a stop bit immediately
4703 before and after them. */
4704 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4705 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4706 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4708 /* For each register, we keep track of how it has been written in the
4709 current instruction group.
4711 If a register is written unconditionally (no qualifying predicate),
4712 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4714 If a register is written if its qualifying predicate P is true, we
4715 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4716 may be written again by the complement of P (P^1) and when this happens,
4717 WRITE_COUNT gets set to 2.
4719 The result of this is that whenever an insn attempts to write a register
4720 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4722 If a predicate register is written by a floating-point insn, we set
4723 WRITTEN_BY_FP to true.
4725 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4726 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4728 struct reg_write_state
4730 unsigned int write_count : 2;
4731 unsigned int first_pred : 16;
4732 unsigned int written_by_fp : 1;
4733 unsigned int written_by_and : 1;
4734 unsigned int written_by_or : 1;
4737 /* Cumulative info for the current instruction group. */
4738 struct reg_write_state rws_sum[NUM_REGS];
4739 /* Info for the current instruction. This gets copied to rws_sum after a
4740 stop bit is emitted. */
4741 struct reg_write_state rws_insn[NUM_REGS];
4743 /* Indicates whether this is the first instruction after a stop bit,
4744 in which case we don't need another stop bit. Without this, we hit
4745 the abort in ia64_variable_issue when scheduling an alloc. */
4746 static int first_instruction;
4748 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4749 RTL for one instruction. */
4752 unsigned int is_write : 1; /* Is register being written? */
4753 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4754 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4755 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4756 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4757 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4760 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4761 static int rws_access_regno (int, struct reg_flags, int);
4762 static int rws_access_reg (rtx, struct reg_flags, int);
4763 static void update_set_flags (rtx, struct reg_flags *, int *, rtx *);
4764 static int set_src_needs_barrier (rtx, struct reg_flags, int, rtx);
4765 static int rtx_needs_barrier (rtx, struct reg_flags, int);
4766 static void init_insn_group_barriers (void);
4767 static int group_barrier_needed_p (rtx);
4768 static int safe_group_barrier_needed_p (rtx);
4770 /* Update *RWS for REGNO, which is being written by the current instruction,
4771 with predicate PRED, and associated register flags in FLAGS. */
4774 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
4777 rws[regno].write_count++;
4779 rws[regno].write_count = 2;
4780 rws[regno].written_by_fp |= flags.is_fp;
4781 /* ??? Not tracking and/or across differing predicates. */
4782 rws[regno].written_by_and = flags.is_and;
4783 rws[regno].written_by_or = flags.is_or;
4784 rws[regno].first_pred = pred;
4787 /* Handle an access to register REGNO of type FLAGS using predicate register
4788 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4789 a dependency with an earlier instruction in the same group. */
4792 rws_access_regno (int regno, struct reg_flags flags, int pred)
4794 int need_barrier = 0;
4796 if (regno >= NUM_REGS)
4799 if (! PR_REGNO_P (regno))
4800 flags.is_and = flags.is_or = 0;
4806 /* One insn writes same reg multiple times? */
4807 if (rws_insn[regno].write_count > 0)
4810 /* Update info for current instruction. */
4811 rws_update (rws_insn, regno, flags, pred);
4812 write_count = rws_sum[regno].write_count;
4814 switch (write_count)
4817 /* The register has not been written yet. */
4818 rws_update (rws_sum, regno, flags, pred);
4822 /* The register has been written via a predicate. If this is
4823 not a complementary predicate, then we need a barrier. */
4824 /* ??? This assumes that P and P+1 are always complementary
4825 predicates for P even. */
4826 if (flags.is_and && rws_sum[regno].written_by_and)
4828 else if (flags.is_or && rws_sum[regno].written_by_or)
4830 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4832 rws_update (rws_sum, regno, flags, pred);
4836 /* The register has been unconditionally written already. We
4838 if (flags.is_and && rws_sum[regno].written_by_and)
4840 else if (flags.is_or && rws_sum[regno].written_by_or)
4844 rws_sum[regno].written_by_and = flags.is_and;
4845 rws_sum[regno].written_by_or = flags.is_or;
4854 if (flags.is_branch)
4856 /* Branches have several RAW exceptions that allow to avoid
4859 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4860 /* RAW dependencies on branch regs are permissible as long
4861 as the writer is a non-branch instruction. Since we
4862 never generate code that uses a branch register written
4863 by a branch instruction, handling this case is
4867 if (REGNO_REG_CLASS (regno) == PR_REGS
4868 && ! rws_sum[regno].written_by_fp)
4869 /* The predicates of a branch are available within the
4870 same insn group as long as the predicate was written by
4871 something other than a floating-point instruction. */
4875 if (flags.is_and && rws_sum[regno].written_by_and)
4877 if (flags.is_or && rws_sum[regno].written_by_or)
4880 switch (rws_sum[regno].write_count)
4883 /* The register has not been written yet. */
4887 /* The register has been written via a predicate. If this is
4888 not a complementary predicate, then we need a barrier. */
4889 /* ??? This assumes that P and P+1 are always complementary
4890 predicates for P even. */
4891 if ((rws_sum[regno].first_pred ^ 1) != pred)
4896 /* The register has been unconditionally written already. We
4906 return need_barrier;
4910 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
4912 int regno = REGNO (reg);
4913 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4916 return rws_access_regno (regno, flags, pred);
4919 int need_barrier = 0;
4921 need_barrier |= rws_access_regno (regno + n, flags, pred);
4922 return need_barrier;
4926 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4927 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4930 update_set_flags (rtx x, struct reg_flags *pflags, int *ppred, rtx *pcond)
4932 rtx src = SET_SRC (x);
4936 switch (GET_CODE (src))
4942 if (SET_DEST (x) == pc_rtx)
4943 /* X is a conditional branch. */
4947 int is_complemented = 0;
4949 /* X is a conditional move. */
4950 rtx cond = XEXP (src, 0);
4951 if (GET_CODE (cond) == EQ)
4952 is_complemented = 1;
4953 cond = XEXP (cond, 0);
4954 if (GET_CODE (cond) != REG
4955 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4958 if (XEXP (src, 1) == SET_DEST (x)
4959 || XEXP (src, 2) == SET_DEST (x))
4961 /* X is a conditional move that conditionally writes the
4964 /* We need another complement in this case. */
4965 if (XEXP (src, 1) == SET_DEST (x))
4966 is_complemented = ! is_complemented;
4968 *ppred = REGNO (cond);
4969 if (is_complemented)
4973 /* ??? If this is a conditional write to the dest, then this
4974 instruction does not actually read one source. This probably
4975 doesn't matter, because that source is also the dest. */
4976 /* ??? Multiple writes to predicate registers are allowed
4977 if they are all AND type compares, or if they are all OR
4978 type compares. We do not generate such instructions
4981 /* ... fall through ... */
4984 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4985 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4986 /* Set pflags->is_fp to 1 so that we know we're dealing
4987 with a floating point comparison when processing the
4988 destination of the SET. */
4991 /* Discover if this is a parallel comparison. We only handle
4992 and.orcm and or.andcm at present, since we must retain a
4993 strict inverse on the predicate pair. */
4994 else if (GET_CODE (src) == AND)
4996 else if (GET_CODE (src) == IOR)
5003 /* Subroutine of rtx_needs_barrier; this function determines whether the
5004 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5005 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5009 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred, rtx cond)
5011 int need_barrier = 0;
5013 rtx src = SET_SRC (x);
5015 if (GET_CODE (src) == CALL)
5016 /* We don't need to worry about the result registers that
5017 get written by subroutine call. */
5018 return rtx_needs_barrier (src, flags, pred);
5019 else if (SET_DEST (x) == pc_rtx)
5021 /* X is a conditional branch. */
5022 /* ??? This seems redundant, as the caller sets this bit for
5024 flags.is_branch = 1;
5025 return rtx_needs_barrier (src, flags, pred);
5028 need_barrier = rtx_needs_barrier (src, flags, pred);
5030 /* This instruction unconditionally uses a predicate register. */
5032 need_barrier |= rws_access_reg (cond, flags, 0);
5035 if (GET_CODE (dst) == ZERO_EXTRACT)
5037 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5038 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5039 dst = XEXP (dst, 0);
5041 return need_barrier;
5044 /* Handle an access to rtx X of type FLAGS using predicate register
5045 PRED. Return 1 if this access creates a dependency with an earlier
5046 instruction in the same group. */
5049 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5052 int is_complemented = 0;
5053 int need_barrier = 0;
5054 const char *format_ptr;
5055 struct reg_flags new_flags;
5063 switch (GET_CODE (x))
5066 update_set_flags (x, &new_flags, &pred, &cond);
5067 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
5068 if (GET_CODE (SET_SRC (x)) != CALL)
5070 new_flags.is_write = 1;
5071 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5076 new_flags.is_write = 0;
5077 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5079 /* Avoid multiple register writes, in case this is a pattern with
5080 multiple CALL rtx. This avoids an abort in rws_access_reg. */
5081 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5083 new_flags.is_write = 1;
5084 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5085 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5086 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5091 /* X is a predicated instruction. */
5093 cond = COND_EXEC_TEST (x);
5096 need_barrier = rtx_needs_barrier (cond, flags, 0);
5098 if (GET_CODE (cond) == EQ)
5099 is_complemented = 1;
5100 cond = XEXP (cond, 0);
5101 if (GET_CODE (cond) != REG
5102 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5104 pred = REGNO (cond);
5105 if (is_complemented)
5108 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5109 return need_barrier;
5113 /* Clobber & use are for earlier compiler-phases only. */
5118 /* We always emit stop bits for traditional asms. We emit stop bits
5119 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5120 if (GET_CODE (x) != ASM_OPERANDS
5121 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5123 /* Avoid writing the register multiple times if we have multiple
5124 asm outputs. This avoids an abort in rws_access_reg. */
5125 if (! rws_insn[REG_VOLATILE].write_count)
5127 new_flags.is_write = 1;
5128 rws_access_regno (REG_VOLATILE, new_flags, pred);
5133 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5134 We can not just fall through here since then we would be confused
5135 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5136 traditional asms unlike their normal usage. */
5138 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5139 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5144 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5146 rtx pat = XVECEXP (x, 0, i);
5147 if (GET_CODE (pat) == SET)
5149 update_set_flags (pat, &new_flags, &pred, &cond);
5150 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
5152 else if (GET_CODE (pat) == USE
5153 || GET_CODE (pat) == CALL
5154 || GET_CODE (pat) == ASM_OPERANDS)
5155 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5156 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
5159 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5161 rtx pat = XVECEXP (x, 0, i);
5162 if (GET_CODE (pat) == SET)
5164 if (GET_CODE (SET_SRC (pat)) != CALL)
5166 new_flags.is_write = 1;
5167 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5171 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5172 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5180 if (REGNO (x) == AR_UNAT_REGNUM)
5182 for (i = 0; i < 64; ++i)
5183 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5186 need_barrier = rws_access_reg (x, flags, pred);
5190 /* Find the regs used in memory address computation. */
5191 new_flags.is_write = 0;
5192 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5195 case CONST_INT: case CONST_DOUBLE:
5196 case SYMBOL_REF: case LABEL_REF: case CONST:
5199 /* Operators with side-effects. */
5200 case POST_INC: case POST_DEC:
5201 if (GET_CODE (XEXP (x, 0)) != REG)
5204 new_flags.is_write = 0;
5205 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5206 new_flags.is_write = 1;
5207 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5211 if (GET_CODE (XEXP (x, 0)) != REG)
5214 new_flags.is_write = 0;
5215 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5216 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5217 new_flags.is_write = 1;
5218 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5221 /* Handle common unary and binary ops for efficiency. */
5222 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5223 case MOD: case UDIV: case UMOD: case AND: case IOR:
5224 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5225 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5226 case NE: case EQ: case GE: case GT: case LE:
5227 case LT: case GEU: case GTU: case LEU: case LTU:
5228 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5229 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5232 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5233 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5234 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5235 case SQRT: case FFS: case POPCOUNT:
5236 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5240 switch (XINT (x, 1))
5242 case UNSPEC_LTOFF_DTPMOD:
5243 case UNSPEC_LTOFF_DTPREL:
5245 case UNSPEC_LTOFF_TPREL:
5247 case UNSPEC_PRED_REL_MUTEX:
5248 case UNSPEC_PIC_CALL:
5250 case UNSPEC_FETCHADD_ACQ:
5251 case UNSPEC_BSP_VALUE:
5252 case UNSPEC_FLUSHRS:
5253 case UNSPEC_BUNDLE_SELECTOR:
5256 case UNSPEC_GR_SPILL:
5257 case UNSPEC_GR_RESTORE:
5259 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5260 HOST_WIDE_INT bit = (offset >> 3) & 63;
5262 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5263 new_flags.is_write = (XINT (x, 1) == 1);
5264 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5269 case UNSPEC_FR_SPILL:
5270 case UNSPEC_FR_RESTORE:
5271 case UNSPEC_GETF_EXP:
5272 case UNSPEC_SETF_EXP:
5274 case UNSPEC_FR_SQRT_RECIP_APPROX:
5275 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5278 case UNSPEC_FR_RECIP_APPROX:
5279 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5280 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5283 case UNSPEC_CMPXCHG_ACQ:
5284 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5285 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5293 case UNSPEC_VOLATILE:
5294 switch (XINT (x, 1))
5297 /* Alloc must always be the first instruction of a group.
5298 We force this by always returning true. */
5299 /* ??? We might get better scheduling if we explicitly check for
5300 input/local/output register dependencies, and modify the
5301 scheduler so that alloc is always reordered to the start of
5302 the current group. We could then eliminate all of the
5303 first_instruction code. */
5304 rws_access_regno (AR_PFS_REGNUM, flags, pred);
5306 new_flags.is_write = 1;
5307 rws_access_regno (REG_AR_CFM, new_flags, pred);
5310 case UNSPECV_SET_BSP:
5314 case UNSPECV_BLOCKAGE:
5315 case UNSPECV_INSN_GROUP_BARRIER:
5317 case UNSPECV_PSAC_ALL:
5318 case UNSPECV_PSAC_NORMAL:
5327 new_flags.is_write = 0;
5328 need_barrier = rws_access_regno (REG_RP, flags, pred);
5329 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5331 new_flags.is_write = 1;
5332 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5333 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5337 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5338 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5339 switch (format_ptr[i])
5341 case '0': /* unused field */
5342 case 'i': /* integer */
5343 case 'n': /* note */
5344 case 'w': /* wide integer */
5345 case 's': /* pointer to string */
5346 case 'S': /* optional pointer to string */
5350 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5355 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5356 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5365 return need_barrier;
5368 /* Clear out the state for group_barrier_needed_p at the start of a
5369 sequence of insns. */
5372 init_insn_group_barriers (void)
5374 memset (rws_sum, 0, sizeof (rws_sum));
5375 first_instruction = 1;
5378 /* Given the current state, recorded by previous calls to this function,
5379 determine whether a group barrier (a stop bit) is necessary before INSN.
5380 Return nonzero if so. */
5383 group_barrier_needed_p (rtx insn)
5386 int need_barrier = 0;
5387 struct reg_flags flags;
5389 memset (&flags, 0, sizeof (flags));
5390 switch (GET_CODE (insn))
5396 /* A barrier doesn't imply an instruction group boundary. */
5400 memset (rws_insn, 0, sizeof (rws_insn));
5404 flags.is_branch = 1;
5405 flags.is_sibcall = SIBLING_CALL_P (insn);
5406 memset (rws_insn, 0, sizeof (rws_insn));
5408 /* Don't bundle a call following another call. */
5409 if ((pat = prev_active_insn (insn))
5410 && GET_CODE (pat) == CALL_INSN)
5416 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5420 flags.is_branch = 1;
5422 /* Don't bundle a jump following a call. */
5423 if ((pat = prev_active_insn (insn))
5424 && GET_CODE (pat) == CALL_INSN)
5432 if (GET_CODE (PATTERN (insn)) == USE
5433 || GET_CODE (PATTERN (insn)) == CLOBBER)
5434 /* Don't care about USE and CLOBBER "insns"---those are used to
5435 indicate to the optimizer that it shouldn't get rid of
5436 certain operations. */
5439 pat = PATTERN (insn);
5441 /* Ug. Hack hacks hacked elsewhere. */
5442 switch (recog_memoized (insn))
5444 /* We play dependency tricks with the epilogue in order
5445 to get proper schedules. Undo this for dv analysis. */
5446 case CODE_FOR_epilogue_deallocate_stack:
5447 case CODE_FOR_prologue_allocate_stack:
5448 pat = XVECEXP (pat, 0, 0);
5451 /* The pattern we use for br.cloop confuses the code above.
5452 The second element of the vector is representative. */
5453 case CODE_FOR_doloop_end_internal:
5454 pat = XVECEXP (pat, 0, 1);
5457 /* Doesn't generate code. */
5458 case CODE_FOR_pred_rel_mutex:
5459 case CODE_FOR_prologue_use:
5466 memset (rws_insn, 0, sizeof (rws_insn));
5467 need_barrier = rtx_needs_barrier (pat, flags, 0);
5469 /* Check to see if the previous instruction was a volatile
5472 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5479 if (first_instruction && INSN_P (insn)
5480 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5481 && GET_CODE (PATTERN (insn)) != USE
5482 && GET_CODE (PATTERN (insn)) != CLOBBER)
5485 first_instruction = 0;
5488 return need_barrier;
5491 /* Like group_barrier_needed_p, but do not clobber the current state. */
5494 safe_group_barrier_needed_p (rtx insn)
5496 struct reg_write_state rws_saved[NUM_REGS];
5497 int saved_first_instruction;
5500 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5501 saved_first_instruction = first_instruction;
5503 t = group_barrier_needed_p (insn);
5505 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5506 first_instruction = saved_first_instruction;
5511 /* Scan the current function and insert stop bits as necessary to
5512 eliminate dependencies. This function assumes that a final
5513 instruction scheduling pass has been run which has already
5514 inserted most of the necessary stop bits. This function only
5515 inserts new ones at basic block boundaries, since these are
5516 invisible to the scheduler. */
5519 emit_insn_group_barriers (FILE *dump)
5523 int insns_since_last_label = 0;
5525 init_insn_group_barriers ();
5527 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5529 if (GET_CODE (insn) == CODE_LABEL)
5531 if (insns_since_last_label)
5533 insns_since_last_label = 0;
5535 else if (GET_CODE (insn) == NOTE
5536 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5538 if (insns_since_last_label)
5540 insns_since_last_label = 0;
5542 else if (GET_CODE (insn) == INSN
5543 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5544 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5546 init_insn_group_barriers ();
5549 else if (INSN_P (insn))
5551 insns_since_last_label = 1;
5553 if (group_barrier_needed_p (insn))
5558 fprintf (dump, "Emitting stop before label %d\n",
5559 INSN_UID (last_label));
5560 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5563 init_insn_group_barriers ();
5571 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5572 This function has to emit all necessary group barriers. */
5575 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5579 init_insn_group_barriers ();
5581 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5583 if (GET_CODE (insn) == BARRIER)
5585 rtx last = prev_active_insn (insn);
5589 if (GET_CODE (last) == JUMP_INSN
5590 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5591 last = prev_active_insn (last);
5592 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5593 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5595 init_insn_group_barriers ();
5597 else if (INSN_P (insn))
5599 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5600 init_insn_group_barriers ();
5601 else if (group_barrier_needed_p (insn))
5603 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5604 init_insn_group_barriers ();
5605 group_barrier_needed_p (insn);
5612 static int errata_find_address_regs (rtx *, void *);
5613 static void errata_emit_nops (rtx);
5614 static void fixup_errata (void);
5616 /* This structure is used to track some details about the previous insns
5617 groups so we can determine if it may be necessary to insert NOPs to
5618 workaround hardware errata. */
5621 HARD_REG_SET p_reg_set;
5622 HARD_REG_SET gr_reg_conditionally_set;
5625 /* Index into the last_group array. */
5626 static int group_idx;
5628 /* Called through for_each_rtx; determines if a hard register that was
5629 conditionally set in the previous group is used as an address register.
5630 It ensures that for_each_rtx returns 1 in that case. */
5632 errata_find_address_regs (rtx *xp, void *data ATTRIBUTE_UNUSED)
5635 if (GET_CODE (x) != MEM)
5638 if (GET_CODE (x) == POST_MODIFY)
5640 if (GET_CODE (x) == REG)
5642 struct group *prev_group = last_group + (group_idx ^ 1);
5643 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5651 /* Called for each insn; this function keeps track of the state in
5652 last_group and emits additional NOPs if necessary to work around
5653 an Itanium A/B step erratum. */
5655 errata_emit_nops (rtx insn)
5657 struct group *this_group = last_group + group_idx;
5658 struct group *prev_group = last_group + (group_idx ^ 1);
5659 rtx pat = PATTERN (insn);
5660 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5661 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5662 enum attr_type type;
5665 if (GET_CODE (real_pat) == USE
5666 || GET_CODE (real_pat) == CLOBBER
5667 || GET_CODE (real_pat) == ASM_INPUT
5668 || GET_CODE (real_pat) == ADDR_VEC
5669 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5670 || asm_noperands (PATTERN (insn)) >= 0)
5673 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5676 if (GET_CODE (set) == PARALLEL)
5679 set = XVECEXP (real_pat, 0, 0);
5680 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5681 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5682 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5689 if (set && GET_CODE (set) != SET)
5692 type = get_attr_type (insn);
5695 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5696 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5698 if ((type == TYPE_M || type == TYPE_A) && cond && set
5699 && REG_P (SET_DEST (set))
5700 && GET_CODE (SET_SRC (set)) != PLUS
5701 && GET_CODE (SET_SRC (set)) != MINUS
5702 && (GET_CODE (SET_SRC (set)) != ASHIFT
5703 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5704 && (GET_CODE (SET_SRC (set)) != MEM
5705 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5706 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5708 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5709 || ! REG_P (XEXP (cond, 0)))
5712 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5713 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5715 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5717 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5718 emit_insn_before (gen_nop (), insn);
5719 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5721 memset (last_group, 0, sizeof last_group);
5725 /* Emit extra nops if they are required to work around hardware errata. */
5732 if (! TARGET_B_STEP)
5736 memset (last_group, 0, sizeof last_group);
5738 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5743 if (ia64_safe_type (insn) == TYPE_S)
5746 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5749 errata_emit_nops (insn);
5754 /* Instruction scheduling support. */
5756 #define NR_BUNDLES 10
5758 /* A list of names of all available bundles. */
5760 static const char *bundle_name [NR_BUNDLES] =
5766 #if NR_BUNDLES == 10
5776 /* Nonzero if we should insert stop bits into the schedule. */
5778 int ia64_final_schedule = 0;
5780 /* Codes of the corresponding quieryied units: */
5782 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5783 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5785 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5786 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5788 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5790 /* The following variable value is an insn group barrier. */
5792 static rtx dfa_stop_insn;
5794 /* The following variable value is the last issued insn. */
5796 static rtx last_scheduled_insn;
5798 /* The following variable value is size of the DFA state. */
5800 static size_t dfa_state_size;
5802 /* The following variable value is pointer to a DFA state used as
5803 temporary variable. */
5805 static state_t temp_dfa_state = NULL;
5807 /* The following variable value is DFA state after issuing the last
5810 static state_t prev_cycle_state = NULL;
5812 /* The following array element values are TRUE if the corresponding
5813 insn requires to add stop bits before it. */
5815 static char *stops_p;
5817 /* The following variable is used to set up the mentioned above array. */
5819 static int stop_before_p = 0;
5821 /* The following variable value is length of the arrays `clocks' and
5824 static int clocks_length;
5826 /* The following array element values are cycles on which the
5827 corresponding insn will be issued. The array is used only for
5832 /* The following array element values are numbers of cycles should be
5833 added to improve insn scheduling for MM_insns for Itanium1. */
5835 static int *add_cycles;
5837 static rtx ia64_single_set (rtx);
5838 static void ia64_emit_insn_before (rtx, rtx);
5840 /* Map a bundle number to its pseudo-op. */
5843 get_bundle_name (int b)
5845 return bundle_name[b];
5849 /* Return the maximum number of instructions a cpu can issue. */
5852 ia64_issue_rate (void)
5857 /* Helper function - like single_set, but look inside COND_EXEC. */
5860 ia64_single_set (rtx insn)
5862 rtx x = PATTERN (insn), ret;
5863 if (GET_CODE (x) == COND_EXEC)
5864 x = COND_EXEC_CODE (x);
5865 if (GET_CODE (x) == SET)
5868 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5869 Although they are not classical single set, the second set is there just
5870 to protect it from moving past FP-relative stack accesses. */
5871 switch (recog_memoized (insn))
5873 case CODE_FOR_prologue_allocate_stack:
5874 case CODE_FOR_epilogue_deallocate_stack:
5875 ret = XVECEXP (x, 0, 0);
5879 ret = single_set_2 (insn, x);
5886 /* Adjust the cost of a scheduling dependency. Return the new cost of
5887 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5890 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
5892 enum attr_itanium_class dep_class;
5893 enum attr_itanium_class insn_class;
5895 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
5898 insn_class = ia64_safe_itanium_class (insn);
5899 dep_class = ia64_safe_itanium_class (dep_insn);
5900 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
5901 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
5907 /* Like emit_insn_before, but skip cycle_display notes.
5908 ??? When cycle display notes are implemented, update this. */
5911 ia64_emit_insn_before (rtx insn, rtx before)
5913 emit_insn_before (insn, before);
5916 /* The following function marks insns who produce addresses for load
5917 and store insns. Such insns will be placed into M slots because it
5918 decrease latency time for Itanium1 (see function
5919 `ia64_produce_address_p' and the DFA descriptions). */
5922 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
5924 rtx insn, link, next, next_tail;
5926 next_tail = NEXT_INSN (tail);
5927 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5930 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5932 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
5934 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
5936 next = XEXP (link, 0);
5937 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
5938 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
5939 && ia64_st_address_bypass_p (insn, next))
5941 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
5942 || ia64_safe_itanium_class (next)
5943 == ITANIUM_CLASS_FLD)
5944 && ia64_ld_address_bypass_p (insn, next))
5947 insn->call = link != 0;
5951 /* We're beginning a new block. Initialize data structures as necessary. */
5954 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
5955 int sched_verbose ATTRIBUTE_UNUSED,
5956 int max_ready ATTRIBUTE_UNUSED)
5958 #ifdef ENABLE_CHECKING
5961 if (reload_completed)
5962 for (insn = NEXT_INSN (current_sched_info->prev_head);
5963 insn != current_sched_info->next_tail;
5964 insn = NEXT_INSN (insn))
5965 if (SCHED_GROUP_P (insn))
5968 last_scheduled_insn = NULL_RTX;
5969 init_insn_group_barriers ();
5972 /* We are about to being issuing insns for this clock cycle.
5973 Override the default sort algorithm to better slot instructions. */
5976 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
5977 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
5981 int n_ready = *pn_ready;
5982 rtx *e_ready = ready + n_ready;
5986 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
5988 if (reorder_type == 0)
5990 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5992 for (insnp = ready; insnp < e_ready; insnp++)
5993 if (insnp < e_ready)
5996 enum attr_type t = ia64_safe_type (insn);
5997 if (t == TYPE_UNKNOWN)
5999 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6000 || asm_noperands (PATTERN (insn)) >= 0)
6002 rtx lowest = ready[n_asms];
6003 ready[n_asms] = insn;
6009 rtx highest = ready[n_ready - 1];
6010 ready[n_ready - 1] = insn;
6017 if (n_asms < n_ready)
6019 /* Some normal insns to process. Skip the asms. */
6023 else if (n_ready > 0)
6027 if (ia64_final_schedule)
6030 int nr_need_stop = 0;
6032 for (insnp = ready; insnp < e_ready; insnp++)
6033 if (safe_group_barrier_needed_p (*insnp))
6036 if (reorder_type == 1 && n_ready == nr_need_stop)
6038 if (reorder_type == 0)
6041 /* Move down everything that needs a stop bit, preserving
6043 while (insnp-- > ready + deleted)
6044 while (insnp >= ready + deleted)
6047 if (! safe_group_barrier_needed_p (insn))
6049 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6060 /* We are about to being issuing insns for this clock cycle. Override
6061 the default sort algorithm to better slot instructions. */
6064 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6067 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6068 pn_ready, clock_var, 0);
6071 /* Like ia64_sched_reorder, but called after issuing each insn.
6072 Override the default sort algorithm to better slot instructions. */
6075 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6076 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6077 int *pn_ready, int clock_var)
6079 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6080 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6081 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6085 /* We are about to issue INSN. Return the number of insns left on the
6086 ready queue that can be issued this cycle. */
6089 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6090 int sched_verbose ATTRIBUTE_UNUSED,
6091 rtx insn ATTRIBUTE_UNUSED,
6092 int can_issue_more ATTRIBUTE_UNUSED)
6094 last_scheduled_insn = insn;
6095 memcpy (prev_cycle_state, curr_state, dfa_state_size);
6096 if (reload_completed)
6098 if (group_barrier_needed_p (insn))
6100 if (GET_CODE (insn) == CALL_INSN)
6101 init_insn_group_barriers ();
6102 stops_p [INSN_UID (insn)] = stop_before_p;
6108 /* We are choosing insn from the ready queue. Return nonzero if INSN
6112 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6114 if (insn == NULL_RTX || !INSN_P (insn))
6116 return (!reload_completed
6117 || !safe_group_barrier_needed_p (insn));
6120 /* The following variable value is pseudo-insn used by the DFA insn
6121 scheduler to change the DFA state when the simulated clock is
6124 static rtx dfa_pre_cycle_insn;
6126 /* We are about to being issuing INSN. Return nonzero if we can not
6127 issue it on given cycle CLOCK and return zero if we should not sort
6128 the ready queue on the next clock start. */
6131 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6132 int clock, int *sort_p)
6134 int setup_clocks_p = FALSE;
6136 if (insn == NULL_RTX || !INSN_P (insn))
6138 if ((reload_completed && safe_group_barrier_needed_p (insn))
6139 || (last_scheduled_insn
6140 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6141 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6142 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6144 init_insn_group_barriers ();
6145 if (verbose && dump)
6146 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6147 last_clock == clock ? " + cycle advance" : "");
6149 if (last_clock == clock)
6151 state_transition (curr_state, dfa_stop_insn);
6152 if (TARGET_EARLY_STOP_BITS)
6153 *sort_p = (last_scheduled_insn == NULL_RTX
6154 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6159 else if (reload_completed)
6160 setup_clocks_p = TRUE;
6161 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6162 state_transition (curr_state, dfa_stop_insn);
6163 state_transition (curr_state, dfa_pre_cycle_insn);
6164 state_transition (curr_state, NULL);
6166 else if (reload_completed)
6167 setup_clocks_p = TRUE;
6168 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM)
6170 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6172 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6177 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6178 if (REG_NOTE_KIND (link) == 0)
6180 enum attr_itanium_class dep_class;
6181 rtx dep_insn = XEXP (link, 0);
6183 dep_class = ia64_safe_itanium_class (dep_insn);
6184 if ((dep_class == ITANIUM_CLASS_MMMUL
6185 || dep_class == ITANIUM_CLASS_MMSHF)
6186 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6188 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6189 d = last_clock - clocks [INSN_UID (dep_insn)];
6192 add_cycles [INSN_UID (insn)] = 3 - d;
6200 /* The following page contains abstract data `bundle states' which are
6201 used for bundling insns (inserting nops and template generation). */
6203 /* The following describes state of insn bundling. */
6207 /* Unique bundle state number to identify them in the debugging
6210 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
6211 /* number nops before and after the insn */
6212 short before_nops_num, after_nops_num;
6213 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6215 int cost; /* cost of the state in cycles */
6216 int accumulated_insns_num; /* number of all previous insns including
6217 nops. L is considered as 2 insns */
6218 int branch_deviation; /* deviation of previous branches from 3rd slots */
6219 struct bundle_state *next; /* next state with the same insn_num */
6220 struct bundle_state *originator; /* originator (previous insn state) */
6221 /* All bundle states are in the following chain. */
6222 struct bundle_state *allocated_states_chain;
6223 /* The DFA State after issuing the insn and the nops. */
6227 /* The following is map insn number to the corresponding bundle state. */
6229 static struct bundle_state **index_to_bundle_states;
6231 /* The unique number of next bundle state. */
6233 static int bundle_states_num;
6235 /* All allocated bundle states are in the following chain. */
6237 static struct bundle_state *allocated_bundle_states_chain;
6239 /* All allocated but not used bundle states are in the following
6242 static struct bundle_state *free_bundle_state_chain;
6245 /* The following function returns a free bundle state. */
6247 static struct bundle_state *
6248 get_free_bundle_state (void)
6250 struct bundle_state *result;
6252 if (free_bundle_state_chain != NULL)
6254 result = free_bundle_state_chain;
6255 free_bundle_state_chain = result->next;
6259 result = xmalloc (sizeof (struct bundle_state));
6260 result->dfa_state = xmalloc (dfa_state_size);
6261 result->allocated_states_chain = allocated_bundle_states_chain;
6262 allocated_bundle_states_chain = result;
6264 result->unique_num = bundle_states_num++;
6269 /* The following function frees given bundle state. */
6272 free_bundle_state (struct bundle_state *state)
6274 state->next = free_bundle_state_chain;
6275 free_bundle_state_chain = state;
6278 /* Start work with abstract data `bundle states'. */
6281 initiate_bundle_states (void)
6283 bundle_states_num = 0;
6284 free_bundle_state_chain = NULL;
6285 allocated_bundle_states_chain = NULL;
6288 /* Finish work with abstract data `bundle states'. */
6291 finish_bundle_states (void)
6293 struct bundle_state *curr_state, *next_state;
6295 for (curr_state = allocated_bundle_states_chain;
6297 curr_state = next_state)
6299 next_state = curr_state->allocated_states_chain;
6300 free (curr_state->dfa_state);
6305 /* Hash table of the bundle states. The key is dfa_state and insn_num
6306 of the bundle states. */
6308 static htab_t bundle_state_table;
6310 /* The function returns hash of BUNDLE_STATE. */
6313 bundle_state_hash (const void *bundle_state)
6315 const struct bundle_state *state = (struct bundle_state *) bundle_state;
6318 for (result = i = 0; i < dfa_state_size; i++)
6319 result += (((unsigned char *) state->dfa_state) [i]
6320 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6321 return result + state->insn_num;
6324 /* The function returns nonzero if the bundle state keys are equal. */
6327 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
6329 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6330 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6332 return (state1->insn_num == state2->insn_num
6333 && memcmp (state1->dfa_state, state2->dfa_state,
6334 dfa_state_size) == 0);
6337 /* The function inserts the BUNDLE_STATE into the hash table. The
6338 function returns nonzero if the bundle has been inserted into the
6339 table. The table contains the best bundle state with given key. */
6342 insert_bundle_state (struct bundle_state *bundle_state)
6346 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6347 if (*entry_ptr == NULL)
6349 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6350 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6351 *entry_ptr = (void *) bundle_state;
6354 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6355 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6356 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6357 > bundle_state->accumulated_insns_num
6358 || (((struct bundle_state *)
6359 *entry_ptr)->accumulated_insns_num
6360 == bundle_state->accumulated_insns_num
6361 && ((struct bundle_state *)
6362 *entry_ptr)->branch_deviation
6363 > bundle_state->branch_deviation))))
6366 struct bundle_state temp;
6368 temp = *(struct bundle_state *) *entry_ptr;
6369 *(struct bundle_state *) *entry_ptr = *bundle_state;
6370 ((struct bundle_state *) *entry_ptr)->next = temp.next;
6371 *bundle_state = temp;
6376 /* Start work with the hash table. */
6379 initiate_bundle_state_table (void)
6381 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6385 /* Finish work with the hash table. */
6388 finish_bundle_state_table (void)
6390 htab_delete (bundle_state_table);
6395 /* The following variable is a insn `nop' used to check bundle states
6396 with different number of inserted nops. */
6398 static rtx ia64_nop;
6400 /* The following function tries to issue NOPS_NUM nops for the current
6401 state without advancing processor cycle. If it failed, the
6402 function returns FALSE and frees the current state. */
6405 try_issue_nops (struct bundle_state *curr_state, int nops_num)
6409 for (i = 0; i < nops_num; i++)
6410 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6412 free_bundle_state (curr_state);
6418 /* The following function tries to issue INSN for the current
6419 state without advancing processor cycle. If it failed, the
6420 function returns FALSE and frees the current state. */
6423 try_issue_insn (struct bundle_state *curr_state, rtx insn)
6425 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6427 free_bundle_state (curr_state);
6433 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6434 starting with ORIGINATOR without advancing processor cycle. If
6435 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6436 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6437 If it was successful, the function creates new bundle state and
6438 insert into the hash table and into `index_to_bundle_states'. */
6441 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6442 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6444 struct bundle_state *curr_state;
6446 curr_state = get_free_bundle_state ();
6447 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6448 curr_state->insn = insn;
6449 curr_state->insn_num = originator->insn_num + 1;
6450 curr_state->cost = originator->cost;
6451 curr_state->originator = originator;
6452 curr_state->before_nops_num = before_nops_num;
6453 curr_state->after_nops_num = 0;
6454 curr_state->accumulated_insns_num
6455 = originator->accumulated_insns_num + before_nops_num;
6456 curr_state->branch_deviation = originator->branch_deviation;
6457 if (insn == NULL_RTX)
6459 else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6461 if (GET_MODE (insn) == TImode)
6463 if (!try_issue_nops (curr_state, before_nops_num))
6465 if (!try_issue_insn (curr_state, insn))
6467 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6468 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6469 && curr_state->accumulated_insns_num % 3 != 0)
6471 free_bundle_state (curr_state);
6475 else if (GET_MODE (insn) != TImode)
6477 if (!try_issue_nops (curr_state, before_nops_num))
6479 if (!try_issue_insn (curr_state, insn))
6481 curr_state->accumulated_insns_num++;
6482 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6483 || asm_noperands (PATTERN (insn)) >= 0)
6485 if (ia64_safe_type (insn) == TYPE_L)
6486 curr_state->accumulated_insns_num++;
6490 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6491 state_transition (curr_state->dfa_state, NULL);
6493 if (!try_issue_nops (curr_state, before_nops_num))
6495 if (!try_issue_insn (curr_state, insn))
6497 curr_state->accumulated_insns_num++;
6498 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6499 || asm_noperands (PATTERN (insn)) >= 0)
6501 /* Finish bundle containing asm insn. */
6502 curr_state->after_nops_num
6503 = 3 - curr_state->accumulated_insns_num % 3;
6504 curr_state->accumulated_insns_num
6505 += 3 - curr_state->accumulated_insns_num % 3;
6507 else if (ia64_safe_type (insn) == TYPE_L)
6508 curr_state->accumulated_insns_num++;
6510 if (ia64_safe_type (insn) == TYPE_B)
6511 curr_state->branch_deviation
6512 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6513 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6515 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6518 struct bundle_state *curr_state1;
6519 struct bundle_state *allocated_states_chain;
6521 curr_state1 = get_free_bundle_state ();
6522 dfa_state = curr_state1->dfa_state;
6523 allocated_states_chain = curr_state1->allocated_states_chain;
6524 *curr_state1 = *curr_state;
6525 curr_state1->dfa_state = dfa_state;
6526 curr_state1->allocated_states_chain = allocated_states_chain;
6527 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6529 curr_state = curr_state1;
6531 if (!try_issue_nops (curr_state,
6532 3 - curr_state->accumulated_insns_num % 3))
6534 curr_state->after_nops_num
6535 = 3 - curr_state->accumulated_insns_num % 3;
6536 curr_state->accumulated_insns_num
6537 += 3 - curr_state->accumulated_insns_num % 3;
6539 if (!insert_bundle_state (curr_state))
6540 free_bundle_state (curr_state);
6544 /* The following function returns position in the two window bundle
6548 get_max_pos (state_t state)
6550 if (cpu_unit_reservation_p (state, pos_6))
6552 else if (cpu_unit_reservation_p (state, pos_5))
6554 else if (cpu_unit_reservation_p (state, pos_4))
6556 else if (cpu_unit_reservation_p (state, pos_3))
6558 else if (cpu_unit_reservation_p (state, pos_2))
6560 else if (cpu_unit_reservation_p (state, pos_1))
6566 /* The function returns code of a possible template for given position
6567 and state. The function should be called only with 2 values of
6568 position equal to 3 or 6. */
6571 get_template (state_t state, int pos)
6576 if (cpu_unit_reservation_p (state, _0mii_))
6578 else if (cpu_unit_reservation_p (state, _0mmi_))
6580 else if (cpu_unit_reservation_p (state, _0mfi_))
6582 else if (cpu_unit_reservation_p (state, _0mmf_))
6584 else if (cpu_unit_reservation_p (state, _0bbb_))
6586 else if (cpu_unit_reservation_p (state, _0mbb_))
6588 else if (cpu_unit_reservation_p (state, _0mib_))
6590 else if (cpu_unit_reservation_p (state, _0mmb_))
6592 else if (cpu_unit_reservation_p (state, _0mfb_))
6594 else if (cpu_unit_reservation_p (state, _0mlx_))
6599 if (cpu_unit_reservation_p (state, _1mii_))
6601 else if (cpu_unit_reservation_p (state, _1mmi_))
6603 else if (cpu_unit_reservation_p (state, _1mfi_))
6605 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6607 else if (cpu_unit_reservation_p (state, _1bbb_))
6609 else if (cpu_unit_reservation_p (state, _1mbb_))
6611 else if (cpu_unit_reservation_p (state, _1mib_))
6613 else if (cpu_unit_reservation_p (state, _1mmb_))
6615 else if (cpu_unit_reservation_p (state, _1mfb_))
6617 else if (cpu_unit_reservation_p (state, _1mlx_))
6626 /* The following function returns an insn important for insn bundling
6627 followed by INSN and before TAIL. */
6630 get_next_important_insn (rtx insn, rtx tail)
6632 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6634 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6635 && GET_CODE (PATTERN (insn)) != USE
6636 && GET_CODE (PATTERN (insn)) != CLOBBER)
6641 /* The following function does insn bundling. Bundling means
6642 inserting templates and nop insns to fit insn groups into permitted
6643 templates. Instruction scheduling uses NDFA (non-deterministic
6644 finite automata) encoding informations about the templates and the
6645 inserted nops. Nondeterminism of the automata permits follows
6646 all possible insn sequences very fast.
6648 Unfortunately it is not possible to get information about inserting
6649 nop insns and used templates from the automata states. The
6650 automata only says that we can issue an insn possibly inserting
6651 some nops before it and using some template. Therefore insn
6652 bundling in this function is implemented by using DFA
6653 (deterministic finite automata). We follows all possible insn
6654 sequences by inserting 0-2 nops (that is what the NDFA describe for
6655 insn scheduling) before/after each insn being bundled. We know the
6656 start of simulated processor cycle from insn scheduling (insn
6657 starting a new cycle has TImode).
6659 Simple implementation of insn bundling would create enormous
6660 number of possible insn sequences satisfying information about new
6661 cycle ticks taken from the insn scheduling. To make the algorithm
6662 practical we use dynamic programming. Each decision (about
6663 inserting nops and implicitly about previous decisions) is described
6664 by structure bundle_state (see above). If we generate the same
6665 bundle state (key is automaton state after issuing the insns and
6666 nops for it), we reuse already generated one. As consequence we
6667 reject some decisions which can not improve the solution and
6668 reduce memory for the algorithm.
6670 When we reach the end of EBB (extended basic block), we choose the
6671 best sequence and then, moving back in EBB, insert templates for
6672 the best alternative. The templates are taken from querying
6673 automaton state for each insn in chosen bundle states.
6675 So the algorithm makes two (forward and backward) passes through
6676 EBB. There is an additional forward pass through EBB for Itanium1
6677 processor. This pass inserts more nops to make dependency between
6678 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6681 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6683 struct bundle_state *curr_state, *next_state, *best_state;
6684 rtx insn, next_insn;
6686 int i, bundle_end_p, only_bundle_end_p, asm_p;
6687 int pos = 0, max_pos, template0, template1;
6690 enum attr_type type;
6693 /* Count insns in the EBB. */
6694 for (insn = NEXT_INSN (prev_head_insn);
6695 insn && insn != tail;
6696 insn = NEXT_INSN (insn))
6702 dfa_clean_insn_cache ();
6703 initiate_bundle_state_table ();
6704 index_to_bundle_states = xmalloc ((insn_num + 2)
6705 * sizeof (struct bundle_state *));
6706 /* First (forward) pass -- generation of bundle states. */
6707 curr_state = get_free_bundle_state ();
6708 curr_state->insn = NULL;
6709 curr_state->before_nops_num = 0;
6710 curr_state->after_nops_num = 0;
6711 curr_state->insn_num = 0;
6712 curr_state->cost = 0;
6713 curr_state->accumulated_insns_num = 0;
6714 curr_state->branch_deviation = 0;
6715 curr_state->next = NULL;
6716 curr_state->originator = NULL;
6717 state_reset (curr_state->dfa_state);
6718 index_to_bundle_states [0] = curr_state;
6720 /* Shift cycle mark if it is put on insn which could be ignored. */
6721 for (insn = NEXT_INSN (prev_head_insn);
6723 insn = NEXT_INSN (insn))
6725 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6726 || GET_CODE (PATTERN (insn)) == USE
6727 || GET_CODE (PATTERN (insn)) == CLOBBER)
6728 && GET_MODE (insn) == TImode)
6730 PUT_MODE (insn, VOIDmode);
6731 for (next_insn = NEXT_INSN (insn);
6733 next_insn = NEXT_INSN (next_insn))
6734 if (INSN_P (next_insn)
6735 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6736 && GET_CODE (PATTERN (next_insn)) != USE
6737 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6739 PUT_MODE (next_insn, TImode);
6743 /* Froward pass: generation of bundle states. */
6744 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6749 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6750 || GET_CODE (PATTERN (insn)) == USE
6751 || GET_CODE (PATTERN (insn)) == CLOBBER)
6753 type = ia64_safe_type (insn);
6754 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6756 index_to_bundle_states [insn_num] = NULL;
6757 for (curr_state = index_to_bundle_states [insn_num - 1];
6759 curr_state = next_state)
6761 pos = curr_state->accumulated_insns_num % 3;
6762 next_state = curr_state->next;
6763 /* We must fill up the current bundle in order to start a
6764 subsequent asm insn in a new bundle. Asm insn is always
6765 placed in a separate bundle. */
6767 = (next_insn != NULL_RTX
6768 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6769 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6770 /* We may fill up the current bundle if it is the cycle end
6771 without a group barrier. */
6773 = (only_bundle_end_p || next_insn == NULL_RTX
6774 || (GET_MODE (next_insn) == TImode
6775 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6776 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6778 /* We need to insert 2 nops for cases like M_MII. To
6779 guarantee issuing all insns on the same cycle for
6780 Itanium 1, we need to issue 2 nops after the first M
6781 insn (MnnMII where n is a nop insn). */
6782 || (type == TYPE_M && ia64_tune == PROCESSOR_ITANIUM
6783 && !bundle_end_p && pos == 1))
6784 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6786 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6788 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6791 if (index_to_bundle_states [insn_num] == NULL)
6793 for (curr_state = index_to_bundle_states [insn_num];
6795 curr_state = curr_state->next)
6796 if (verbose >= 2 && dump)
6798 /* This structure is taken from generated code of the
6799 pipeline hazard recognizer (see file insn-attrtab.c).
6800 Please don't forget to change the structure if a new
6801 automaton is added to .md file. */
6804 unsigned short one_automaton_state;
6805 unsigned short oneb_automaton_state;
6806 unsigned short two_automaton_state;
6807 unsigned short twob_automaton_state;
6812 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6813 curr_state->unique_num,
6814 (curr_state->originator == NULL
6815 ? -1 : curr_state->originator->unique_num),
6817 curr_state->before_nops_num, curr_state->after_nops_num,
6818 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6819 (ia64_tune == PROCESSOR_ITANIUM
6820 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6821 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6825 if (index_to_bundle_states [insn_num] == NULL)
6826 /* We should find a solution because the 2nd insn scheduling has
6829 /* Find a state corresponding to the best insn sequence. */
6831 for (curr_state = index_to_bundle_states [insn_num];
6833 curr_state = curr_state->next)
6834 /* We are just looking at the states with fully filled up last
6835 bundle. The first we prefer insn sequences with minimal cost
6836 then with minimal inserted nops and finally with branch insns
6837 placed in the 3rd slots. */
6838 if (curr_state->accumulated_insns_num % 3 == 0
6839 && (best_state == NULL || best_state->cost > curr_state->cost
6840 || (best_state->cost == curr_state->cost
6841 && (curr_state->accumulated_insns_num
6842 < best_state->accumulated_insns_num
6843 || (curr_state->accumulated_insns_num
6844 == best_state->accumulated_insns_num
6845 && curr_state->branch_deviation
6846 < best_state->branch_deviation)))))
6847 best_state = curr_state;
6848 /* Second (backward) pass: adding nops and templates. */
6849 insn_num = best_state->before_nops_num;
6850 template0 = template1 = -1;
6851 for (curr_state = best_state;
6852 curr_state->originator != NULL;
6853 curr_state = curr_state->originator)
6855 insn = curr_state->insn;
6856 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6857 || asm_noperands (PATTERN (insn)) >= 0);
6859 if (verbose >= 2 && dump)
6863 unsigned short one_automaton_state;
6864 unsigned short oneb_automaton_state;
6865 unsigned short two_automaton_state;
6866 unsigned short twob_automaton_state;
6871 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6872 curr_state->unique_num,
6873 (curr_state->originator == NULL
6874 ? -1 : curr_state->originator->unique_num),
6876 curr_state->before_nops_num, curr_state->after_nops_num,
6877 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6878 (ia64_tune == PROCESSOR_ITANIUM
6879 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6880 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6883 /* Find the position in the current bundle window. The window can
6884 contain at most two bundles. Two bundle window means that
6885 the processor will make two bundle rotation. */
6886 max_pos = get_max_pos (curr_state->dfa_state);
6888 /* The following (negative template number) means that the
6889 processor did one bundle rotation. */
6890 || (max_pos == 3 && template0 < 0))
6892 /* We are at the end of the window -- find template(s) for
6896 template0 = get_template (curr_state->dfa_state, 3);
6899 template1 = get_template (curr_state->dfa_state, 3);
6900 template0 = get_template (curr_state->dfa_state, 6);
6903 if (max_pos > 3 && template1 < 0)
6904 /* It may happen when we have the stop inside a bundle. */
6908 template1 = get_template (curr_state->dfa_state, 3);
6912 /* Emit nops after the current insn. */
6913 for (i = 0; i < curr_state->after_nops_num; i++)
6916 emit_insn_after (nop, insn);
6922 /* We are at the start of a bundle: emit the template
6923 (it should be defined). */
6926 b = gen_bundle_selector (GEN_INT (template0));
6927 ia64_emit_insn_before (b, nop);
6928 /* If we have two bundle window, we make one bundle
6929 rotation. Otherwise template0 will be undefined
6930 (negative value). */
6931 template0 = template1;
6935 /* Move the position backward in the window. Group barrier has
6936 no slot. Asm insn takes all bundle. */
6937 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6938 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6939 && asm_noperands (PATTERN (insn)) < 0)
6941 /* Long insn takes 2 slots. */
6942 if (ia64_safe_type (insn) == TYPE_L)
6947 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6948 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6949 && asm_noperands (PATTERN (insn)) < 0)
6951 /* The current insn is at the bundle start: emit the
6955 b = gen_bundle_selector (GEN_INT (template0));
6956 ia64_emit_insn_before (b, insn);
6957 b = PREV_INSN (insn);
6959 /* See comment above in analogous place for emiting nops
6961 template0 = template1;
6964 /* Emit nops after the current insn. */
6965 for (i = 0; i < curr_state->before_nops_num; i++)
6968 ia64_emit_insn_before (nop, insn);
6969 nop = PREV_INSN (insn);
6976 /* See comment above in analogous place for emiting nops
6980 b = gen_bundle_selector (GEN_INT (template0));
6981 ia64_emit_insn_before (b, insn);
6982 b = PREV_INSN (insn);
6984 template0 = template1;
6989 if (ia64_tune == PROCESSOR_ITANIUM)
6990 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
6991 Itanium1 has a strange design, if the distance between an insn
6992 and dependent MM-insn is less 4 then we have a 6 additional
6993 cycles stall. So we make the distance equal to 4 cycles if it
6995 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7000 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
7001 || GET_CODE (PATTERN (insn)) == USE
7002 || GET_CODE (PATTERN (insn)) == CLOBBER)
7004 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7005 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
7006 /* We found a MM-insn which needs additional cycles. */
7012 /* Now we are searching for a template of the bundle in
7013 which the MM-insn is placed and the position of the
7014 insn in the bundle (0, 1, 2). Also we are searching
7015 for that there is a stop before the insn. */
7016 last = prev_active_insn (insn);
7017 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
7019 last = prev_active_insn (last);
7021 for (;; last = prev_active_insn (last))
7022 if (recog_memoized (last) == CODE_FOR_bundle_selector)
7024 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
7026 /* The insn is in MLX bundle. Change the template
7027 onto MFI because we will add nops before the
7028 insn. It simplifies subsequent code a lot. */
7030 = gen_bundle_selector (GEN_INT (2)); /* -> MFI */
7033 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7035 /* Some check of correctness: the stop is not at the
7036 bundle start, there are no more 3 insns in the bundle,
7037 and the MM-insn is not at the start of bundle with
7039 if ((pred_stop_p && n == 0) || n > 2
7040 || (template0 == 9 && n != 0))
7042 /* Put nops after the insn in the bundle. */
7043 for (j = 3 - n; j > 0; j --)
7044 ia64_emit_insn_before (gen_nop (), insn);
7045 /* It takes into account that we will add more N nops
7046 before the insn lately -- please see code below. */
7047 add_cycles [INSN_UID (insn)]--;
7048 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
7049 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7052 add_cycles [INSN_UID (insn)]--;
7053 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
7055 /* Insert "MII;" template. */
7056 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (0)),
7058 ia64_emit_insn_before (gen_nop (), insn);
7059 ia64_emit_insn_before (gen_nop (), insn);
7062 /* To decrease code size, we use "MI;I;"
7064 ia64_emit_insn_before
7065 (gen_insn_group_barrier (GEN_INT (3)), insn);
7068 ia64_emit_insn_before (gen_nop (), insn);
7069 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7072 /* Put the MM-insn in the same slot of a bundle with the
7073 same template as the original one. */
7074 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
7076 /* To put the insn in the same slot, add necessary number
7078 for (j = n; j > 0; j --)
7079 ia64_emit_insn_before (gen_nop (), insn);
7080 /* Put the stop if the original bundle had it. */
7082 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7086 free (index_to_bundle_states);
7087 finish_bundle_state_table ();
7089 dfa_clean_insn_cache ();
7092 /* The following function is called at the end of scheduling BB or
7093 EBB. After reload, it inserts stop bits and does insn bundling. */
7096 ia64_sched_finish (FILE *dump, int sched_verbose)
7099 fprintf (dump, "// Finishing schedule.\n");
7100 if (!reload_completed)
7102 if (reload_completed)
7104 final_emit_insn_group_barriers (dump);
7105 bundling (dump, sched_verbose, current_sched_info->prev_head,
7106 current_sched_info->next_tail);
7107 if (sched_verbose && dump)
7108 fprintf (dump, "// finishing %d-%d\n",
7109 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
7110 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
7116 /* The following function inserts stop bits in scheduled BB or EBB. */
7119 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7122 int need_barrier_p = 0;
7123 rtx prev_insn = NULL_RTX;
7125 init_insn_group_barriers ();
7127 for (insn = NEXT_INSN (current_sched_info->prev_head);
7128 insn != current_sched_info->next_tail;
7129 insn = NEXT_INSN (insn))
7131 if (GET_CODE (insn) == BARRIER)
7133 rtx last = prev_active_insn (insn);
7137 if (GET_CODE (last) == JUMP_INSN
7138 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7139 last = prev_active_insn (last);
7140 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7141 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7143 init_insn_group_barriers ();
7145 prev_insn = NULL_RTX;
7147 else if (INSN_P (insn))
7149 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7151 init_insn_group_barriers ();
7153 prev_insn = NULL_RTX;
7155 else if (need_barrier_p || group_barrier_needed_p (insn))
7157 if (TARGET_EARLY_STOP_BITS)
7162 last != current_sched_info->prev_head;
7163 last = PREV_INSN (last))
7164 if (INSN_P (last) && GET_MODE (last) == TImode
7165 && stops_p [INSN_UID (last)])
7167 if (last == current_sched_info->prev_head)
7169 last = prev_active_insn (last);
7171 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
7172 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7174 init_insn_group_barriers ();
7175 for (last = NEXT_INSN (last);
7177 last = NEXT_INSN (last))
7179 group_barrier_needed_p (last);
7183 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7185 init_insn_group_barriers ();
7187 group_barrier_needed_p (insn);
7188 prev_insn = NULL_RTX;
7190 else if (recog_memoized (insn) >= 0)
7192 need_barrier_p = (GET_CODE (insn) == CALL_INSN
7193 || GET_CODE (PATTERN (insn)) == ASM_INPUT
7194 || asm_noperands (PATTERN (insn)) >= 0);
7201 /* If the following function returns TRUE, we will use the the DFA
7205 ia64_use_dfa_pipeline_interface (void)
7210 /* If the following function returns TRUE, we will use the the DFA
7214 ia64_first_cycle_multipass_dfa_lookahead (void)
7216 return (reload_completed ? 6 : 4);
7219 /* The following function initiates variable `dfa_pre_cycle_insn'. */
7222 ia64_init_dfa_pre_cycle_insn (void)
7224 if (temp_dfa_state == NULL)
7226 dfa_state_size = state_size ();
7227 temp_dfa_state = xmalloc (dfa_state_size);
7228 prev_cycle_state = xmalloc (dfa_state_size);
7230 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7231 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7232 recog_memoized (dfa_pre_cycle_insn);
7233 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7234 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7235 recog_memoized (dfa_stop_insn);
7238 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7239 used by the DFA insn scheduler. */
7242 ia64_dfa_pre_cycle_insn (void)
7244 return dfa_pre_cycle_insn;
7247 /* The following function returns TRUE if PRODUCER (of type ilog or
7248 ld) produces address for CONSUMER (of type st or stf). */
7251 ia64_st_address_bypass_p (rtx producer, rtx consumer)
7255 if (producer == NULL_RTX || consumer == NULL_RTX)
7257 dest = ia64_single_set (producer);
7258 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7259 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7261 if (GET_CODE (reg) == SUBREG)
7262 reg = SUBREG_REG (reg);
7263 dest = ia64_single_set (consumer);
7264 if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
7265 || GET_CODE (mem) != MEM)
7267 return reg_mentioned_p (reg, mem);
7270 /* The following function returns TRUE if PRODUCER (of type ilog or
7271 ld) produces address for CONSUMER (of type ld or fld). */
7274 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
7276 rtx dest, src, reg, mem;
7278 if (producer == NULL_RTX || consumer == NULL_RTX)
7280 dest = ia64_single_set (producer);
7281 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7282 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7284 if (GET_CODE (reg) == SUBREG)
7285 reg = SUBREG_REG (reg);
7286 src = ia64_single_set (consumer);
7287 if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
7289 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7290 mem = XVECEXP (mem, 0, 0);
7291 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7292 mem = XEXP (mem, 0);
7294 /* Note that LO_SUM is used for GOT loads. */
7295 if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
7298 return reg_mentioned_p (reg, mem);
7301 /* The following function returns TRUE if INSN produces address for a
7302 load/store insn. We will place such insns into M slot because it
7303 decreases its latency time. */
7306 ia64_produce_address_p (rtx insn)
7312 /* Emit pseudo-ops for the assembler to describe predicate relations.
7313 At present this assumes that we only consider predicate pairs to
7314 be mutex, and that the assembler can deduce proper values from
7315 straight-line code. */
7318 emit_predicate_relation_info (void)
7322 FOR_EACH_BB_REVERSE (bb)
7325 rtx head = BB_HEAD (bb);
7327 /* We only need such notes at code labels. */
7328 if (GET_CODE (head) != CODE_LABEL)
7330 if (GET_CODE (NEXT_INSN (head)) == NOTE
7331 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7332 head = NEXT_INSN (head);
7334 for (r = PR_REG (0); r < PR_REG (64); r += 2)
7335 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7337 rtx p = gen_rtx_REG (BImode, r);
7338 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7339 if (head == BB_END (bb))
7345 /* Look for conditional calls that do not return, and protect predicate
7346 relations around them. Otherwise the assembler will assume the call
7347 returns, and complain about uses of call-clobbered predicates after
7349 FOR_EACH_BB_REVERSE (bb)
7351 rtx insn = BB_HEAD (bb);
7355 if (GET_CODE (insn) == CALL_INSN
7356 && GET_CODE (PATTERN (insn)) == COND_EXEC
7357 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7359 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7360 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7361 if (BB_HEAD (bb) == insn)
7363 if (BB_END (bb) == insn)
7367 if (insn == BB_END (bb))
7369 insn = NEXT_INSN (insn);
7374 /* Perform machine dependent operations on the rtl chain INSNS. */
7379 /* We are freeing block_for_insn in the toplev to keep compatibility
7380 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7381 compute_bb_for_insn ();
7383 /* If optimizing, we'll have split before scheduling. */
7385 split_all_insns (0);
7387 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7388 non-optimizing bootstrap. */
7389 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7391 if (ia64_flag_schedule_insns2)
7393 timevar_push (TV_SCHED2);
7394 ia64_final_schedule = 1;
7396 initiate_bundle_states ();
7397 ia64_nop = make_insn_raw (gen_nop ());
7398 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7399 recog_memoized (ia64_nop);
7400 clocks_length = get_max_uid () + 1;
7401 stops_p = xcalloc (1, clocks_length);
7402 if (ia64_tune == PROCESSOR_ITANIUM)
7404 clocks = xcalloc (clocks_length, sizeof (int));
7405 add_cycles = xcalloc (clocks_length, sizeof (int));
7407 if (ia64_tune == PROCESSOR_ITANIUM2)
7409 pos_1 = get_cpu_unit_code ("2_1");
7410 pos_2 = get_cpu_unit_code ("2_2");
7411 pos_3 = get_cpu_unit_code ("2_3");
7412 pos_4 = get_cpu_unit_code ("2_4");
7413 pos_5 = get_cpu_unit_code ("2_5");
7414 pos_6 = get_cpu_unit_code ("2_6");
7415 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7416 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7417 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7418 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7419 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7420 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7421 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7422 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7423 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7424 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7425 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7426 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7427 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7428 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7429 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7430 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7431 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7432 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7433 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7434 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7438 pos_1 = get_cpu_unit_code ("1_1");
7439 pos_2 = get_cpu_unit_code ("1_2");
7440 pos_3 = get_cpu_unit_code ("1_3");
7441 pos_4 = get_cpu_unit_code ("1_4");
7442 pos_5 = get_cpu_unit_code ("1_5");
7443 pos_6 = get_cpu_unit_code ("1_6");
7444 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7445 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7446 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7447 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7448 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7449 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7450 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7451 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7452 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7453 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7454 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7455 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7456 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7457 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7458 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7459 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7460 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7461 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7462 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7463 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7465 schedule_ebbs (rtl_dump_file);
7466 finish_bundle_states ();
7467 if (ia64_tune == PROCESSOR_ITANIUM)
7473 emit_insn_group_barriers (rtl_dump_file);
7475 ia64_final_schedule = 0;
7476 timevar_pop (TV_SCHED2);
7479 emit_all_insn_group_barriers (rtl_dump_file);
7481 /* A call must not be the last instruction in a function, so that the
7482 return address is still within the function, so that unwinding works
7483 properly. Note that IA-64 differs from dwarf2 on this point. */
7484 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7489 insn = get_last_insn ();
7490 if (! INSN_P (insn))
7491 insn = prev_active_insn (insn);
7492 if (GET_CODE (insn) == INSN
7493 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7494 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7497 insn = prev_active_insn (insn);
7499 if (GET_CODE (insn) == CALL_INSN)
7502 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7503 emit_insn (gen_break_f ());
7504 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7509 emit_predicate_relation_info ();
7512 /* Return true if REGNO is used by the epilogue. */
7515 ia64_epilogue_uses (int regno)
7520 /* With a call to a function in another module, we will write a new
7521 value to "gp". After returning from such a call, we need to make
7522 sure the function restores the original gp-value, even if the
7523 function itself does not use the gp anymore. */
7524 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7526 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7527 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7528 /* For functions defined with the syscall_linkage attribute, all
7529 input registers are marked as live at all function exits. This
7530 prevents the register allocator from using the input registers,
7531 which in turn makes it possible to restart a system call after
7532 an interrupt without having to save/restore the input registers.
7533 This also prevents kernel data from leaking to application code. */
7534 return lookup_attribute ("syscall_linkage",
7535 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7538 /* Conditional return patterns can't represent the use of `b0' as
7539 the return address, so we force the value live this way. */
7543 /* Likewise for ar.pfs, which is used by br.ret. */
7551 /* Return true if REGNO is used by the frame unwinder. */
7554 ia64_eh_uses (int regno)
7556 if (! reload_completed)
7559 if (current_frame_info.reg_save_b0
7560 && regno == current_frame_info.reg_save_b0)
7562 if (current_frame_info.reg_save_pr
7563 && regno == current_frame_info.reg_save_pr)
7565 if (current_frame_info.reg_save_ar_pfs
7566 && regno == current_frame_info.reg_save_ar_pfs)
7568 if (current_frame_info.reg_save_ar_unat
7569 && regno == current_frame_info.reg_save_ar_unat)
7571 if (current_frame_info.reg_save_ar_lc
7572 && regno == current_frame_info.reg_save_ar_lc)
7578 /* Return true if this goes in small data/bss. */
7580 /* ??? We could also support own long data here. Generating movl/add/ld8
7581 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7582 code faster because there is one less load. This also includes incomplete
7583 types which can't go in sdata/sbss. */
7586 ia64_in_small_data_p (tree exp)
7588 if (TARGET_NO_SDATA)
7591 /* We want to merge strings, so we never consider them small data. */
7592 if (TREE_CODE (exp) == STRING_CST)
7595 /* Functions are never small data. */
7596 if (TREE_CODE (exp) == FUNCTION_DECL)
7599 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7601 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7602 if (strcmp (section, ".sdata") == 0
7603 || strcmp (section, ".sbss") == 0)
7608 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7610 /* If this is an incomplete type with size 0, then we can't put it
7611 in sdata because it might be too big when completed. */
7612 if (size > 0 && size <= ia64_section_threshold)
7619 /* Output assembly directives for prologue regions. */
7621 /* The current basic block number. */
7623 static bool last_block;
7625 /* True if we need a copy_state command at the start of the next block. */
7627 static bool need_copy_state;
7629 /* The function emits unwind directives for the start of an epilogue. */
7632 process_epilogue (void)
7634 /* If this isn't the last block of the function, then we need to label the
7635 current state, and copy it back in at the start of the next block. */
7639 fprintf (asm_out_file, "\t.label_state 1\n");
7640 need_copy_state = true;
7643 fprintf (asm_out_file, "\t.restore sp\n");
7646 /* This function processes a SET pattern looking for specific patterns
7647 which result in emitting an assembly directive required for unwinding. */
7650 process_set (FILE *asm_out_file, rtx pat)
7652 rtx src = SET_SRC (pat);
7653 rtx dest = SET_DEST (pat);
7654 int src_regno, dest_regno;
7656 /* Look for the ALLOC insn. */
7657 if (GET_CODE (src) == UNSPEC_VOLATILE
7658 && XINT (src, 1) == UNSPECV_ALLOC
7659 && GET_CODE (dest) == REG)
7661 dest_regno = REGNO (dest);
7663 /* If this isn't the final destination for ar.pfs, the alloc
7664 shouldn't have been marked frame related. */
7665 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7668 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7669 ia64_dbx_register_number (dest_regno));
7673 /* Look for SP = .... */
7674 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7676 if (GET_CODE (src) == PLUS)
7678 rtx op0 = XEXP (src, 0);
7679 rtx op1 = XEXP (src, 1);
7680 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7682 if (INTVAL (op1) < 0)
7683 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7686 process_epilogue ();
7691 else if (GET_CODE (src) == REG
7692 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7693 process_epilogue ();
7700 /* Register move we need to look at. */
7701 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7703 src_regno = REGNO (src);
7704 dest_regno = REGNO (dest);
7709 /* Saving return address pointer. */
7710 if (dest_regno != current_frame_info.reg_save_b0)
7712 fprintf (asm_out_file, "\t.save rp, r%d\n",
7713 ia64_dbx_register_number (dest_regno));
7717 if (dest_regno != current_frame_info.reg_save_pr)
7719 fprintf (asm_out_file, "\t.save pr, r%d\n",
7720 ia64_dbx_register_number (dest_regno));
7723 case AR_UNAT_REGNUM:
7724 if (dest_regno != current_frame_info.reg_save_ar_unat)
7726 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7727 ia64_dbx_register_number (dest_regno));
7731 if (dest_regno != current_frame_info.reg_save_ar_lc)
7733 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7734 ia64_dbx_register_number (dest_regno));
7737 case STACK_POINTER_REGNUM:
7738 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7739 || ! frame_pointer_needed)
7741 fprintf (asm_out_file, "\t.vframe r%d\n",
7742 ia64_dbx_register_number (dest_regno));
7746 /* Everything else should indicate being stored to memory. */
7751 /* Memory store we need to look at. */
7752 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7758 if (GET_CODE (XEXP (dest, 0)) == REG)
7760 base = XEXP (dest, 0);
7763 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7764 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7766 base = XEXP (XEXP (dest, 0), 0);
7767 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7772 if (base == hard_frame_pointer_rtx)
7774 saveop = ".savepsp";
7777 else if (base == stack_pointer_rtx)
7782 src_regno = REGNO (src);
7786 if (current_frame_info.reg_save_b0 != 0)
7788 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7792 if (current_frame_info.reg_save_pr != 0)
7794 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7798 if (current_frame_info.reg_save_ar_lc != 0)
7800 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7804 if (current_frame_info.reg_save_ar_pfs != 0)
7806 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7809 case AR_UNAT_REGNUM:
7810 if (current_frame_info.reg_save_ar_unat != 0)
7812 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7819 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7820 1 << (src_regno - GR_REG (4)));
7828 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7829 1 << (src_regno - BR_REG (1)));
7836 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7837 1 << (src_regno - FR_REG (2)));
7840 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7841 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7842 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7843 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7844 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7845 1 << (src_regno - FR_REG (12)));
7857 /* This function looks at a single insn and emits any directives
7858 required to unwind this insn. */
7860 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
7862 if (flag_unwind_tables
7863 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7867 if (GET_CODE (insn) == NOTE
7868 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7870 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7872 /* Restore unwind state from immediately before the epilogue. */
7873 if (need_copy_state)
7875 fprintf (asm_out_file, "\t.body\n");
7876 fprintf (asm_out_file, "\t.copy_state 1\n");
7877 need_copy_state = false;
7881 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7884 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7886 pat = XEXP (pat, 0);
7888 pat = PATTERN (insn);
7890 switch (GET_CODE (pat))
7893 process_set (asm_out_file, pat);
7899 int limit = XVECLEN (pat, 0);
7900 for (par_index = 0; par_index < limit; par_index++)
7902 rtx x = XVECEXP (pat, 0, par_index);
7903 if (GET_CODE (x) == SET)
7904 process_set (asm_out_file, x);
7917 ia64_init_builtins (void)
7919 tree psi_type_node = build_pointer_type (integer_type_node);
7920 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7922 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7923 tree si_ftype_psi_si_si
7924 = build_function_type_list (integer_type_node,
7925 psi_type_node, integer_type_node,
7926 integer_type_node, NULL_TREE);
7928 /* __sync_val_compare_and_swap_di */
7929 tree di_ftype_pdi_di_di
7930 = build_function_type_list (long_integer_type_node,
7931 pdi_type_node, long_integer_type_node,
7932 long_integer_type_node, NULL_TREE);
7933 /* __sync_bool_compare_and_swap_di */
7934 tree si_ftype_pdi_di_di
7935 = build_function_type_list (integer_type_node,
7936 pdi_type_node, long_integer_type_node,
7937 long_integer_type_node, NULL_TREE);
7938 /* __sync_synchronize */
7939 tree void_ftype_void
7940 = build_function_type (void_type_node, void_list_node);
7942 /* __sync_lock_test_and_set_si */
7943 tree si_ftype_psi_si
7944 = build_function_type_list (integer_type_node,
7945 psi_type_node, integer_type_node, NULL_TREE);
7947 /* __sync_lock_test_and_set_di */
7948 tree di_ftype_pdi_di
7949 = build_function_type_list (long_integer_type_node,
7950 pdi_type_node, long_integer_type_node,
7953 /* __sync_lock_release_si */
7955 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7957 /* __sync_lock_release_di */
7959 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7964 /* The __fpreg type. */
7965 fpreg_type = make_node (REAL_TYPE);
7966 /* ??? The back end should know to load/save __fpreg variables using
7967 the ldf.fill and stf.spill instructions. */
7968 TYPE_PRECISION (fpreg_type) = 96;
7969 layout_type (fpreg_type);
7970 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
7972 /* The __float80 type. */
7973 float80_type = make_node (REAL_TYPE);
7974 TYPE_PRECISION (float80_type) = 96;
7975 layout_type (float80_type);
7976 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
7978 /* The __float128 type. */
7981 tree float128_type = make_node (REAL_TYPE);
7982 TYPE_PRECISION (float128_type) = 128;
7983 layout_type (float128_type);
7984 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
7987 /* Under HPUX, this is a synonym for "long double". */
7988 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
7991 #define def_builtin(name, type, code) \
7992 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
7994 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7995 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7996 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7997 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7998 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7999 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
8000 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
8001 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
8003 def_builtin ("__sync_synchronize", void_ftype_void,
8004 IA64_BUILTIN_SYNCHRONIZE);
8006 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
8007 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
8008 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
8009 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
8010 def_builtin ("__sync_lock_release_si", void_ftype_psi,
8011 IA64_BUILTIN_LOCK_RELEASE_SI);
8012 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
8013 IA64_BUILTIN_LOCK_RELEASE_DI);
8015 def_builtin ("__builtin_ia64_bsp",
8016 build_function_type (ptr_type_node, void_list_node),
8019 def_builtin ("__builtin_ia64_flushrs",
8020 build_function_type (void_type_node, void_list_node),
8021 IA64_BUILTIN_FLUSHRS);
8023 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
8024 IA64_BUILTIN_FETCH_AND_ADD_SI);
8025 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
8026 IA64_BUILTIN_FETCH_AND_SUB_SI);
8027 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
8028 IA64_BUILTIN_FETCH_AND_OR_SI);
8029 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
8030 IA64_BUILTIN_FETCH_AND_AND_SI);
8031 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
8032 IA64_BUILTIN_FETCH_AND_XOR_SI);
8033 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
8034 IA64_BUILTIN_FETCH_AND_NAND_SI);
8036 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
8037 IA64_BUILTIN_ADD_AND_FETCH_SI);
8038 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
8039 IA64_BUILTIN_SUB_AND_FETCH_SI);
8040 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
8041 IA64_BUILTIN_OR_AND_FETCH_SI);
8042 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
8043 IA64_BUILTIN_AND_AND_FETCH_SI);
8044 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
8045 IA64_BUILTIN_XOR_AND_FETCH_SI);
8046 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
8047 IA64_BUILTIN_NAND_AND_FETCH_SI);
8049 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
8050 IA64_BUILTIN_FETCH_AND_ADD_DI);
8051 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
8052 IA64_BUILTIN_FETCH_AND_SUB_DI);
8053 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
8054 IA64_BUILTIN_FETCH_AND_OR_DI);
8055 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
8056 IA64_BUILTIN_FETCH_AND_AND_DI);
8057 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
8058 IA64_BUILTIN_FETCH_AND_XOR_DI);
8059 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
8060 IA64_BUILTIN_FETCH_AND_NAND_DI);
8062 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
8063 IA64_BUILTIN_ADD_AND_FETCH_DI);
8064 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
8065 IA64_BUILTIN_SUB_AND_FETCH_DI);
8066 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
8067 IA64_BUILTIN_OR_AND_FETCH_DI);
8068 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
8069 IA64_BUILTIN_AND_AND_FETCH_DI);
8070 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
8071 IA64_BUILTIN_XOR_AND_FETCH_DI);
8072 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
8073 IA64_BUILTIN_NAND_AND_FETCH_DI);
8078 /* Expand fetch_and_op intrinsics. The basic code sequence is:
8086 cmpxchgsz.acq tmp = [ptr], tmp
8087 } while (tmp != ret)
8091 ia64_expand_fetch_and_op (optab binoptab, enum machine_mode mode,
8092 tree arglist, rtx target)
8094 rtx ret, label, tmp, ccv, insn, mem, value;
8097 arg0 = TREE_VALUE (arglist);
8098 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8099 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8100 #ifdef POINTERS_EXTEND_UNSIGNED
8101 if (GET_MODE(mem) != Pmode)
8102 mem = convert_memory_address (Pmode, mem);
8104 value = expand_expr (arg1, NULL_RTX, mode, 0);
8106 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8107 MEM_VOLATILE_P (mem) = 1;
8109 if (target && register_operand (target, mode))
8112 ret = gen_reg_rtx (mode);
8114 emit_insn (gen_mf ());
8116 /* Special case for fetchadd instructions. */
8117 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
8120 insn = gen_fetchadd_acq_si (ret, mem, value);
8122 insn = gen_fetchadd_acq_di (ret, mem, value);
8127 tmp = gen_reg_rtx (mode);
8128 /* ar.ccv must always be loaded with a zero-extended DImode value. */
8129 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8130 emit_move_insn (tmp, mem);
8132 label = gen_label_rtx ();
8134 emit_move_insn (ret, tmp);
8135 convert_move (ccv, tmp, /*unsignedp=*/1);
8137 /* Perform the specific operation. Special case NAND by noticing
8138 one_cmpl_optab instead. */
8139 if (binoptab == one_cmpl_optab)
8141 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8142 binoptab = and_optab;
8144 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
8147 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
8149 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
8152 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
8157 /* Expand op_and_fetch intrinsics. The basic code sequence is:
8164 ret = tmp <op> value;
8165 cmpxchgsz.acq tmp = [ptr], ret
8166 } while (tmp != old)
8170 ia64_expand_op_and_fetch (optab binoptab, enum machine_mode mode,
8171 tree arglist, rtx target)
8173 rtx old, label, tmp, ret, ccv, insn, mem, value;
8176 arg0 = TREE_VALUE (arglist);
8177 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8178 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8179 #ifdef POINTERS_EXTEND_UNSIGNED
8180 if (GET_MODE(mem) != Pmode)
8181 mem = convert_memory_address (Pmode, mem);
8184 value = expand_expr (arg1, NULL_RTX, mode, 0);
8186 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8187 MEM_VOLATILE_P (mem) = 1;
8189 if (target && ! register_operand (target, mode))
8192 emit_insn (gen_mf ());
8193 tmp = gen_reg_rtx (mode);
8194 old = gen_reg_rtx (mode);
8195 /* ar.ccv must always be loaded with a zero-extended DImode value. */
8196 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8198 emit_move_insn (tmp, mem);
8200 label = gen_label_rtx ();
8202 emit_move_insn (old, tmp);
8203 convert_move (ccv, tmp, /*unsignedp=*/1);
8205 /* Perform the specific operation. Special case NAND by noticing
8206 one_cmpl_optab instead. */
8207 if (binoptab == one_cmpl_optab)
8209 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8210 binoptab = and_optab;
8212 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
8215 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
8217 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
8220 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
8225 /* Expand val_ and bool_compare_and_swap. For val_ we want:
8229 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
8232 For bool_ it's the same except return ret == oldval.
8236 ia64_expand_compare_and_swap (enum machine_mode rmode, enum machine_mode mode,
8237 int boolp, tree arglist, rtx target)
8239 tree arg0, arg1, arg2;
8240 rtx mem, old, new, ccv, tmp, insn;
8242 arg0 = TREE_VALUE (arglist);
8243 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8244 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8245 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8246 old = expand_expr (arg1, NULL_RTX, mode, 0);
8247 new = expand_expr (arg2, NULL_RTX, mode, 0);
8249 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8250 MEM_VOLATILE_P (mem) = 1;
8252 if (GET_MODE (old) != mode)
8253 old = convert_to_mode (mode, old, /*unsignedp=*/1);
8254 if (GET_MODE (new) != mode)
8255 new = convert_to_mode (mode, new, /*unsignedp=*/1);
8257 if (! register_operand (old, mode))
8258 old = copy_to_mode_reg (mode, old);
8259 if (! register_operand (new, mode))
8260 new = copy_to_mode_reg (mode, new);
8262 if (! boolp && target && register_operand (target, mode))
8265 tmp = gen_reg_rtx (mode);
8267 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8268 convert_move (ccv, old, /*unsignedp=*/1);
8269 emit_insn (gen_mf ());
8271 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
8273 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
8279 target = gen_reg_rtx (rmode);
8280 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
8286 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
8289 ia64_expand_lock_test_and_set (enum machine_mode mode, tree arglist,
8293 rtx mem, new, ret, insn;
8295 arg0 = TREE_VALUE (arglist);
8296 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8297 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8298 new = expand_expr (arg1, NULL_RTX, mode, 0);
8300 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8301 MEM_VOLATILE_P (mem) = 1;
8302 if (! register_operand (new, mode))
8303 new = copy_to_mode_reg (mode, new);
8305 if (target && register_operand (target, mode))
8308 ret = gen_reg_rtx (mode);
8311 insn = gen_xchgsi (ret, mem, new);
8313 insn = gen_xchgdi (ret, mem, new);
8319 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
8322 ia64_expand_lock_release (enum machine_mode mode, tree arglist,
8323 rtx target ATTRIBUTE_UNUSED)
8328 arg0 = TREE_VALUE (arglist);
8329 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8331 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8332 MEM_VOLATILE_P (mem) = 1;
8334 emit_move_insn (mem, const0_rtx);
8340 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8341 enum machine_mode mode ATTRIBUTE_UNUSED,
8342 int ignore ATTRIBUTE_UNUSED)
8344 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8345 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8346 tree arglist = TREE_OPERAND (exp, 1);
8347 enum machine_mode rmode = VOIDmode;
8351 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8352 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8357 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8358 case IA64_BUILTIN_LOCK_RELEASE_SI:
8359 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8360 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8361 case IA64_BUILTIN_FETCH_AND_OR_SI:
8362 case IA64_BUILTIN_FETCH_AND_AND_SI:
8363 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8364 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8365 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8366 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8367 case IA64_BUILTIN_OR_AND_FETCH_SI:
8368 case IA64_BUILTIN_AND_AND_FETCH_SI:
8369 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8370 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8374 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8379 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8384 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8385 case IA64_BUILTIN_LOCK_RELEASE_DI:
8386 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8387 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8388 case IA64_BUILTIN_FETCH_AND_OR_DI:
8389 case IA64_BUILTIN_FETCH_AND_AND_DI:
8390 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8391 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8392 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8393 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8394 case IA64_BUILTIN_OR_AND_FETCH_DI:
8395 case IA64_BUILTIN_AND_AND_FETCH_DI:
8396 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8397 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8407 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8408 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8409 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8412 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8413 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8414 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8417 case IA64_BUILTIN_SYNCHRONIZE:
8418 emit_insn (gen_mf ());
8421 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8422 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8423 return ia64_expand_lock_test_and_set (mode, arglist, target);
8425 case IA64_BUILTIN_LOCK_RELEASE_SI:
8426 case IA64_BUILTIN_LOCK_RELEASE_DI:
8427 return ia64_expand_lock_release (mode, arglist, target);
8429 case IA64_BUILTIN_BSP:
8430 if (! target || ! register_operand (target, DImode))
8431 target = gen_reg_rtx (DImode);
8432 emit_insn (gen_bsp_value (target));
8433 #ifdef POINTERS_EXTEND_UNSIGNED
8434 target = convert_memory_address (ptr_mode, target);
8438 case IA64_BUILTIN_FLUSHRS:
8439 emit_insn (gen_flushrs ());
8442 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8443 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8444 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8446 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8447 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8448 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8450 case IA64_BUILTIN_FETCH_AND_OR_SI:
8451 case IA64_BUILTIN_FETCH_AND_OR_DI:
8452 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8454 case IA64_BUILTIN_FETCH_AND_AND_SI:
8455 case IA64_BUILTIN_FETCH_AND_AND_DI:
8456 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8458 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8459 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8460 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8462 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8463 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8464 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8466 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8467 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8468 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8470 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8471 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8472 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8474 case IA64_BUILTIN_OR_AND_FETCH_SI:
8475 case IA64_BUILTIN_OR_AND_FETCH_DI:
8476 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8478 case IA64_BUILTIN_AND_AND_FETCH_SI:
8479 case IA64_BUILTIN_AND_AND_FETCH_DI:
8480 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8482 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8483 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8484 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8486 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8487 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8488 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8497 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8498 most significant bits of the stack slot. */
8501 ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8503 /* Exception to normal case for structures/unions/etc. */
8505 if (type && AGGREGATE_TYPE_P (type)
8506 && int_size_in_bytes (type) < UNITS_PER_WORD)
8509 /* Fall back to the default. */
8510 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8513 /* Linked list of all external functions that are to be emitted by GCC.
8514 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8515 order to avoid putting out names that are never really used. */
8517 struct extern_func_list
8519 struct extern_func_list *next; /* next external */
8520 char *name; /* name of the external */
8521 } *extern_func_head = 0;
8524 ia64_hpux_add_extern_decl (const char *name)
8526 struct extern_func_list *p;
8528 p = (struct extern_func_list *) xmalloc (sizeof (struct extern_func_list));
8529 p->name = xmalloc (strlen (name) + 1);
8530 strcpy(p->name, name);
8531 p->next = extern_func_head;
8532 extern_func_head = p;
8535 /* Print out the list of used global functions. */
8538 ia64_hpux_file_end (void)
8540 while (extern_func_head)
8542 const char *real_name;
8545 real_name = (* targetm.strip_name_encoding) (extern_func_head->name);
8546 decl = maybe_get_identifier (real_name);
8549 || (! TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (decl)))
8552 TREE_ASM_WRITTEN (decl) = 1;
8553 (*targetm.asm_out.globalize_label) (asm_out_file,
8554 extern_func_head->name);
8555 fputs (TYPE_ASM_OP, asm_out_file);
8556 assemble_name (asm_out_file, extern_func_head->name);
8557 putc (',', asm_out_file);
8558 fprintf (asm_out_file, TYPE_OPERAND_FMT, "function");
8559 putc ('\n', asm_out_file);
8561 extern_func_head = extern_func_head->next;
8565 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8568 ia64_hpux_init_libfuncs (void)
8570 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8571 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8572 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8573 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8574 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8575 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8576 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8577 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8579 /* ia64_expand_compare uses this. */
8580 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8582 /* These should never be used. */
8583 set_optab_libfunc (eq_optab, TFmode, 0);
8584 set_optab_libfunc (ne_optab, TFmode, 0);
8585 set_optab_libfunc (gt_optab, TFmode, 0);
8586 set_optab_libfunc (ge_optab, TFmode, 0);
8587 set_optab_libfunc (lt_optab, TFmode, 0);
8588 set_optab_libfunc (le_optab, TFmode, 0);
8590 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8591 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8592 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8593 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8594 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8595 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8597 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8598 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8599 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8600 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8602 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8603 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8606 /* Rename the division and modulus functions in VMS. */
8609 ia64_vms_init_libfuncs (void)
8611 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8612 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8613 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8614 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8615 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8616 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8617 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8618 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8621 /* Switch to the section to which we should output X. The only thing
8622 special we do here is to honor small data. */
8625 ia64_select_rtx_section (enum machine_mode mode, rtx x,
8626 unsigned HOST_WIDE_INT align)
8628 if (GET_MODE_SIZE (mode) > 0
8629 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8632 default_elf_select_rtx_section (mode, x, align);
8635 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8636 Pretend flag_pic is always set. */
8639 ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8641 default_elf_select_section_1 (exp, reloc, align, true);
8645 ia64_rwreloc_unique_section (tree decl, int reloc)
8647 default_unique_section_1 (decl, reloc, true);
8651 ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8652 unsigned HOST_WIDE_INT align)
8654 int save_pic = flag_pic;
8656 ia64_select_rtx_section (mode, x, align);
8657 flag_pic = save_pic;
8661 ia64_rwreloc_section_type_flags (tree decl, const char *name, int reloc)
8663 return default_section_type_flags_1 (decl, name, reloc, true);
8667 /* Output the assembler code for a thunk function. THUNK_DECL is the
8668 declaration for the thunk function itself, FUNCTION is the decl for
8669 the target function. DELTA is an immediate constant offset to be
8670 added to THIS. If VCALL_OFFSET is nonzero, the word at
8671 *(*this + vcall_offset) should be added to THIS. */
8674 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8675 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8678 rtx this, insn, funexp;
8680 reload_completed = 1;
8681 epilogue_completed = 1;
8684 /* Set things up as ia64_expand_prologue might. */
8685 last_scratch_gr_reg = 15;
8687 memset (¤t_frame_info, 0, sizeof (current_frame_info));
8688 current_frame_info.spill_cfa_off = -16;
8689 current_frame_info.n_input_regs = 1;
8690 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8692 if (!TARGET_REG_NAMES)
8693 reg_names[IN_REG (0)] = ia64_reg_numbers[0];
8695 /* Mark the end of the (empty) prologue. */
8696 emit_note (NOTE_INSN_PROLOGUE_END);
8698 this = gen_rtx_REG (Pmode, IN_REG (0));
8701 rtx tmp = gen_rtx_REG (ptr_mode, IN_REG (0));
8702 REG_POINTER (tmp) = 1;
8703 if (delta && CONST_OK_FOR_I (delta))
8705 emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8709 emit_insn (gen_ptr_extend (this, tmp));
8712 /* Apply the constant offset, if required. */
8715 rtx delta_rtx = GEN_INT (delta);
8717 if (!CONST_OK_FOR_I (delta))
8719 rtx tmp = gen_rtx_REG (Pmode, 2);
8720 emit_move_insn (tmp, delta_rtx);
8723 emit_insn (gen_adddi3 (this, this, delta_rtx));
8726 /* Apply the offset from the vtable, if required. */
8729 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8730 rtx tmp = gen_rtx_REG (Pmode, 2);
8734 rtx t = gen_rtx_REG (ptr_mode, 2);
8735 REG_POINTER (t) = 1;
8736 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8737 if (CONST_OK_FOR_I (vcall_offset))
8739 emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8744 emit_insn (gen_ptr_extend (tmp, t));
8747 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8751 if (!CONST_OK_FOR_J (vcall_offset))
8753 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8754 emit_move_insn (tmp2, vcall_offset_rtx);
8755 vcall_offset_rtx = tmp2;
8757 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8761 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8762 gen_rtx_MEM (ptr_mode, tmp));
8764 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8766 emit_insn (gen_adddi3 (this, this, tmp));
8769 /* Generate a tail call to the target function. */
8770 if (! TREE_USED (function))
8772 assemble_external (function);
8773 TREE_USED (function) = 1;
8775 funexp = XEXP (DECL_RTL (function), 0);
8776 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8777 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8778 insn = get_last_insn ();
8779 SIBLING_CALL_P (insn) = 1;
8781 /* Code generation for calls relies on splitting. */
8782 reload_completed = 1;
8783 epilogue_completed = 1;
8784 try_split (PATTERN (insn), insn, 0);
8788 /* Run just enough of rest_of_compilation to get the insns emitted.
8789 There's not really enough bulk here to make other passes such as
8790 instruction scheduling worth while. Note that use_thunk calls
8791 assemble_start_function and assemble_end_function. */
8793 insn_locators_initialize ();
8794 emit_all_insn_group_barriers (NULL);
8795 insn = get_insns ();
8796 shorten_branches (insn);
8797 final_start_function (insn, file, 1);
8798 final (insn, file, 1, 0);
8799 final_end_function ();
8801 reload_completed = 0;
8802 epilogue_completed = 0;
8806 #include "gt-ia64.h"